Spaces:
Sleeping
Sleeping
File size: 4,828 Bytes
9c7e22f 0ffcc97 58cca6a 70e229a af81e42 70e229a 5fc4fcc bad6434 9334c42 5fc4fcc f6b26e3 a13e583 4cc0b38 a13e583 c90d5be 5fc4fcc 29d374a 7fa4911 f8496a1 2d8c474 80460d6 2d8c474 54f5785 2d8c474 80460d6 2d8c474 04fdf40 2d8c474 70e229a 7fa4911 a13e583 7fa4911 53635ec 70e229a 53635ec 70e229a 7fa4911 70e229a 7fa4911 70e229a 7fa4911 f8496a1 2d8c474 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import streamlit as st
from llama_cpp import Llama
st.set_page_config(page_title="russian medical phi3-mini", page_icon="π€", layout="wide")
# Custom CSS for better styling
st.markdown("""
<style>
.stTextInput > div > div > input {
background-color: #f0f2f6;
}
.chat-message {
padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
}
.chat-message.user {
background-color: #2b313e
}
.chat-message.bot {
background-color: #475063
}
.chat-message .avatar {
width: 20%;
}
.chat-message .avatar img {
max-width: 78px;
max-height: 78px;
border-radius: 50%;
object-fit: cover;
}
.chat-message .message {
width: 80%;
padding: 0 1.5rem;
color: #fff;
}
.chat-message.user::before {
content: "π€";
font-size: 1.5em;
margin-right: 0.5em;
}
.chat-message.bot::before {
content: "π€";
font-size: 1.5em;
margin-right: 0.5em;
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def load_model():
return Llama.from_pretrained(
repo_id="Mykes/med_phi3-mini-4k-GGUF",
# repo_id="Mykes/med_tinyllama_gguf",
filename="*Q4_K_M.gguf",
verbose=False,
n_ctx=256,
n_batch=128,
n_threads=8,
use_mlock=True,
use_mmap=True,
# n_ctx=256, # Reduced context window
# n_batch=8, # Smaller batch size
# n_threads=2, # Adjust based on your CPU cores
# use_mmap=True,
)
llm = load_model()
def format_context(messages):
context = ""
for message in messages:
if message["role"] == "user":
context += f"Human: {message['content']}\n"
else:
context += f"Assistant: {message['content']}\n"
return context
# Sidebar
st.sidebar.title("Chat with AI")
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")
# # Add useful information to the sidebar
# st.sidebar.header("How to use")
# st.sidebar.markdown("""
# 1. Type your question in the chat input box at the bottom of the screen.
# 2. Press Enter or click the Send button to submit your question.
# 3. The AI will generate a response based on your input.
# 4. You can have a continuous conversation by asking follow-up questions.
# """)
st.sidebar.header("Model Information")
st.sidebar.markdown("""
- Model: med_phi3-mini-4k-GGUF
([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF))
- Context Length: 512 tokens
- This model is specialized in medical knowledge.
- Russian language
""")
st.sidebar.header("Tips")
st.sidebar.markdown("""
- Be clear and specific in your questions.
- For medical queries, provide relevant details.
- Remember that this is an AI model and may not always be 100% accurate.
""")
# Main chat interface
st.title("russian medical phi3-mini")
# Initialize chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Display chat messages from history on app rerun
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
# React to user input
if prompt := st.chat_input("What is your question?"):
# Display user message in chat message container
st.chat_message("user").markdown(prompt)
# Add user message to chat history
st.session_state.messages.append({"role": "user", "content": prompt})
# Format the context with the last 5 messages
context = format_context(st.session_state.messages[-3:])
# Prepare the model input
model_input = f"{context}Human: {prompt}\nAssistant:"
# Display assistant response in chat message container
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = ""
for token in llm(
model_input,
max_tokens=None,
stop=["Human:", "<end_of_turn>"],
echo=True,
stream=True
):
full_response += token['choices'][0]['text']
message_placeholder.markdown(full_response + "β")
# Remove the initial context and prompt from the response
assistant_response = full_response.split("Assistant:")[-1].strip()
message_placeholder.markdown(assistant_response)
# Add assistant response to chat history
st.session_state.messages.append({"role": "assistant", "content": assistant_response})
# Add a button to clear the chat history
if st.sidebar.button("Clear Chat History"):
st.session_state.messages = []
st.experimental_rerun()
# Display the number of messages in the current conversation
st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages")
# Add a footer
st.sidebar.markdown("---")
st.sidebar.markdown("Created with β€οΈ using Streamlit and Llama.cpp") |