import streamlit as st from llama_cpp import Llama st.set_page_config(page_title="Chat with AI", page_icon="🤖", layout="wide") # Custom CSS for better styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model(): return Llama.from_pretrained( repo_id="Mykes/med_phi3-mini-4k-GGUF", filename="*Q4_K_M.gguf", verbose=False, n_ctx=512, n_batch=512, n_threads=8, use_mlock=True, use_mmap=True, # n_ctx=256, # Reduced context window # n_batch=8, # Smaller batch size # n_threads=2, # Adjust based on your CPU cores # use_mmap=True, ) llm = load_model() def format_context(messages): context = "" for message in messages: if message["role"] == "user": context += f"Human: {message['content']}\n" else: context += f"Assistant: {message['content']}\n" return context # Sidebar st.sidebar.title("Chat with AI") st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.") # # Add useful information to the sidebar # st.sidebar.header("How to use") # st.sidebar.markdown(""" # 1. Type your question in the chat input box at the bottom of the screen. # 2. Press Enter or click the Send button to submit your question. # 3. The AI will generate a response based on your input. # 4. You can have a continuous conversation by asking follow-up questions. # """) st.sidebar.header("Model Information") st.sidebar.markdown(""" - Model: med_phi3-mini-4k-GGUF ([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF)) - Context Length: 512 tokens - This model is specialized in medical knowledge. - Russian language """) st.sidebar.header("Tips") st.sidebar.markdown(""" - Be clear and specific in your questions. - For medical queries, provide relevant details. - Remember that this is an AI model and may not always be 100% accurate. """) # Main chat interface st.title("Chat with AI") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # React to user input if prompt := st.chat_input("What is your question?"): # Display user message in chat message container st.chat_message("user").markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) # Format the context with the last 5 messages context = format_context(st.session_state.messages[-3:]) # Prepare the model input model_input = f"{context}Human: {prompt}\nAssistant:" # Display assistant response in chat message container with st.chat_message("assistant"): message_placeholder = st.empty() full_response = "" for token in llm( model_input, max_tokens=None, stop=["Human:", ""], echo=True, stream=True ): full_response += token['choices'][0]['text'] message_placeholder.markdown(full_response + "▌") # Remove the initial context and prompt from the response assistant_response = full_response.split("Assistant:")[-1].strip() message_placeholder.markdown(assistant_response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": assistant_response}) # Add a button to clear the chat history if st.sidebar.button("Clear Chat History"): st.session_state.messages = [] st.experimental_rerun() # Display the number of messages in the current conversation st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages") # Add a footer st.sidebar.markdown("---") st.sidebar.markdown("Created with ❤️ using Streamlit and Llama.cpp")