import streamlit as st from transformers import AutoModelForCausalLM, AutoTokenizer from datasets import load_dataset from gtts import gTTS import os import re import random # Enable Dark Mode and Custom CSS st.markdown( """ """, unsafe_allow_html=True, ) # Load models and datasets try: code_llama_model = AutoModelForCausalLM.from_pretrained("meta-llama/CodeLlama-7B-Python") code_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/CodeLlama-7B-Python") except Exception as e: st.error(f"Error loading model: {e}") code_llama_model = None code_llama_tokenizer = None try: wordlist_dataset = load_dataset("Canstralian/Wordlists") except Exception as e: st.error(f"Error loading Wordlist dataset: {e}") wordlist_dataset = None # Initialize chat history storage if "messages" not in st.session_state: st.session_state.messages = [{"role": "assistant", "content": "How may I assist you?"}] # Function to validate the prompt using regular expressions def validate_prompt(prompt: str) -> bool: """ Validates if the input prompt is not empty and meets some basic format rules. Args: prompt (str): The input prompt to be validated. Returns: bool: True if the prompt is valid, False otherwise. """ # Allow alphanumeric characters, spaces, and punctuation return bool(re.match(r'^[A-Za-z0-9\s\.,;!?(){}[\]]+$', prompt)) # Function to convert text to speech def text_to_speech(text: str) -> None: """ Converts text to speech using gTTS and saves it as an MP3 file. Args: text (str): The text to be converted to speech. """ try: tts = gTTS(text, lang='en') tts.save("response.mp3") os.system("mpg321 response.mp3") # Use mpg321 to play the audio except Exception as e: st.error(f"Error generating speech: {e}") # Function to generate chatbot response def generate_response(prompt: str) -> str: """ Generates a response from the assistant based on the user input. Args: prompt (str): The user's input prompt. Returns: str: The generated response from the assistant. """ if code_llama_model and code_llama_tokenizer: if "python" in prompt.lower(): # Use the Code Llama model for code-related queries inputs = code_llama_tokenizer(prompt, return_tensors="pt") outputs = code_llama_model.generate(**inputs, max_length=150, num_return_sequences=1) response = code_llama_tokenizer.decode(outputs[0], skip_special_tokens=True) else: response = "I'm here to assist with your queries." else: response = "Model not loaded. Please try again later." if "osint" in prompt.lower(): # Respond with dataset-based OSINT information response = "OSINT data analysis coming soon!" elif "wordlist" in prompt.lower() and wordlist_dataset: # Fetch and display a random entry from the Wordlist dataset wordlist_entry = random.choice(wordlist_dataset["train"])["text"] response = f"Here's a random wordlist entry: {wordlist_entry}" return response # Display chat history using native Streamlit components for message in st.session_state.messages: if message["role"] == "user": st.markdown(f"**You:** {message['content']}") elif message["role"] == "assistant": st.markdown(f"**Assistant:** {message['content']}") # User input handling prompt = st.text_input("Your message:", key="chat_input") if prompt: # Validate user input if validate_prompt(prompt): st.session_state.messages.append({"role": "user", "content": prompt}) st.markdown(f"**You:** {prompt}") # Generate assistant response with st.spinner("Assistant is typing..."): response = generate_response(prompt) st.markdown(f"**Assistant:** {response}") # Store the assistant's response st.session_state.messages.append({"role": "assistant", "content": response}) # Text-to-Speech integration for the assistant's response text_to_speech(response) else: st.warning("Invalid input. Please ensure your input contains only valid characters.") # User Feedback Section feedback = st.selectbox("How was your experience?", ["😊 Excellent", "😐 Okay", "😕 Poor"]) if feedback: st.success(f"Thank you for your feedback: {feedback}", icon="✅")