File size: 4,828 Bytes
9c7e22f
0ffcc97
 
58cca6a
70e229a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af81e42
 
 
 
 
 
 
 
 
 
 
70e229a
 
 
5fc4fcc
 
 
bad6434
 
9334c42
5fc4fcc
f6b26e3
 
a13e583
 
4cc0b38
a13e583
 
 
c90d5be
5fc4fcc
 
 
29d374a
7fa4911
 
 
 
 
 
 
 
f8496a1
2d8c474
 
 
 
80460d6
 
 
 
 
 
 
 
2d8c474
 
 
54f5785
 
2d8c474
 
80460d6
2d8c474
 
 
 
 
 
 
 
 
 
04fdf40
2d8c474
70e229a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7fa4911
a13e583
7fa4911
 
 
53635ec
70e229a
 
 
 
53635ec
70e229a
 
 
7fa4911
70e229a
 
 
 
 
7fa4911
 
 
 
70e229a
 
7fa4911
f8496a1
2d8c474
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import streamlit as st
from llama_cpp import Llama

st.set_page_config(page_title="russian medical phi3-mini", page_icon="πŸ€–", layout="wide")

# Custom CSS for better styling
st.markdown("""
<style>
.stTextInput > div > div > input {
    background-color: #f0f2f6;
}
.chat-message {
    padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
}
.chat-message.user {
    background-color: #2b313e
}
.chat-message.bot {
    background-color: #475063
}
.chat-message .avatar {
  width: 20%;
}
.chat-message .avatar img {
  max-width: 78px;
  max-height: 78px;
  border-radius: 50%;
  object-fit: cover;
}
.chat-message .message {
  width: 80%;
  padding: 0 1.5rem;
  color: #fff;
}
.chat-message.user::before {
    content: "πŸ‘€";
    font-size: 1.5em;
    margin-right: 0.5em;
}

.chat-message.bot::before {
    content: "πŸ€–";
    font-size: 1.5em;
    margin-right: 0.5em;
}
</style>
""", unsafe_allow_html=True)

@st.cache_resource
def load_model():
    return Llama.from_pretrained(
        repo_id="Mykes/med_phi3-mini-4k-GGUF",
        # repo_id="Mykes/med_tinyllama_gguf",
        filename="*Q4_K_M.gguf",
        verbose=False,
        n_ctx=256,
        n_batch=128,
        n_threads=8,
        use_mlock=True,
        use_mmap=True,
        # n_ctx=256,  # Reduced context window
        # n_batch=8,  # Smaller batch size
        # n_threads=2,  # Adjust based on your CPU cores
        # use_mmap=True,
    )

llm = load_model()

def format_context(messages):
    context = ""
    for message in messages:
        if message["role"] == "user":
            context += f"Human: {message['content']}\n"
        else:
            context += f"Assistant: {message['content']}\n"
    return context

# Sidebar
st.sidebar.title("Chat with AI")
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")

# # Add useful information to the sidebar
# st.sidebar.header("How to use")
# st.sidebar.markdown("""
# 1. Type your question in the chat input box at the bottom of the screen.
# 2. Press Enter or click the Send button to submit your question.
# 3. The AI will generate a response based on your input.
# 4. You can have a continuous conversation by asking follow-up questions.
# """)

st.sidebar.header("Model Information")
st.sidebar.markdown("""
- Model: med_phi3-mini-4k-GGUF 
  ([View on Hugging Face](https://huggingface.co/Mykes/med_phi3-mini-4k-GGUF))
- Context Length: 512 tokens
- This model is specialized in medical knowledge.
- Russian language
""")

st.sidebar.header("Tips")
st.sidebar.markdown("""
- Be clear and specific in your questions.
- For medical queries, provide relevant details.
- Remember that this is an AI model and may not always be 100% accurate.
""")

# Main chat interface
st.title("russian medical phi3-mini")

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if prompt := st.chat_input("What is your question?"):
    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Format the context with the last 5 messages
    context = format_context(st.session_state.messages[-3:])
    
    # Prepare the model input
    model_input = f"{context}Human: {prompt}\nAssistant:"
    
    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""
        
        for token in llm(
            model_input,
            max_tokens=None,
            stop=["Human:", "<end_of_turn>"],
            echo=True,
            stream=True
        ):
            full_response += token['choices'][0]['text']
            message_placeholder.markdown(full_response + "β–Œ")
        
        # Remove the initial context and prompt from the response
        assistant_response = full_response.split("Assistant:")[-1].strip()
        message_placeholder.markdown(assistant_response)
    
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": assistant_response})

# Add a button to clear the chat history
if st.sidebar.button("Clear Chat History"):
    st.session_state.messages = []
    st.experimental_rerun()

# Display the number of messages in the current conversation
st.sidebar.markdown(f"Current conversation length: {len(st.session_state.messages)} messages")

# Add a footer
st.sidebar.markdown("---")
st.sidebar.markdown("Created with ❀️ using Streamlit and Llama.cpp")