File size: 2,959 Bytes
6d70f5d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import streamlit as st
from llama_cpp import Llama

st.set_page_config(page_title="Chat with AI", page_icon="πŸ€–")

# Custom CSS for better styling
st.markdown("""
<style>
.stTextInput > div > div > input {
    background-color: #f0f2f6;
}
.chat-message {
    padding: 1.5rem; border-radius: 0.5rem; margin-bottom: 1rem; display: flex
}
.chat-message.user {
    background-color: #2b313e
}
.chat-message.bot {
    background-color: #475063
}
.chat-message .avatar {
  width: 20%;
}
.chat-message .avatar img {
  max-width: 78px;
  max-height: 78px;
  border-radius: 50%;
  object-fit: cover;
}
.chat-message .message {
  width: 80%;
  padding: 0 1.5rem;
  color: #fff;
}
</style>
""", unsafe_allow_html=True)

@st.cache_resource
def load_model():
    return Llama.from_pretrained(
        repo_id="Mykes/med_phi3-mini-4k-GGUF",
        filename="*Q4_K_M.gguf",
        verbose=False,
        n_ctx=512,
        n_batch=256,
        n_threads=4
    )

llm = load_model()

def format_context(messages):
    context = ""
    for message in messages:
        if message["role"] == "user":
            context += f"Human: {message['content']}\n"
        else:
            context += f"Assistant: {message['content']}\n"
    return context

# Initialize chat history
if "messages" not in st.session_state:
    st.session_state.messages = []

# Display chat messages from history on app rerun
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

# React to user input
if prompt := st.chat_input("What is your question?"):
    # Display user message in chat message container
    st.chat_message("user").markdown(prompt)
    # Add user message to chat history
    st.session_state.messages.append({"role": "user", "content": prompt})

    # Format the context with the last 5 messages
    context = format_context(st.session_state.messages[-5:])
    
    # Prepare the model input
    model_input = f"{context}Human: {prompt}\nAssistant:"
    
    # Display assistant response in chat message container
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""
        
        for token in llm(
            model_input,
            max_tokens=None,
            stop=["Human:", "<end_of_turn>"],
            echo=True,
            stream=True
        ):
            full_response += token['choices'][0]['text']
            message_placeholder.markdown(full_response + "β–Œ")
        
        # Remove the initial context and prompt from the response
        assistant_response = full_response.split("Assistant:")[-1].strip()
        message_placeholder.markdown(assistant_response)
    
    # Add assistant response to chat history
    st.session_state.messages.append({"role": "assistant", "content": assistant_response})

st.sidebar.title("Chat with AI")
st.sidebar.markdown("This is a simple chat interface using Streamlit and an AI model.")