File size: 12,130 Bytes
c917d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
906493f
c917d47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
import streamlit as st
import asyncio
import sys
from pathlib import Path
import base64
import pandas as pd
from typing import Literal, Tuple, Optional
from wiki import render_wiki_tab
from search_handlers import run_global_search, run_local_search, run_drift_search
import auth


import graphrag.api as api
from graphrag.config import GraphRagConfig, load_config, resolve_paths
from graphrag.index.create_pipeline_config import create_pipeline_config
from graphrag.logging import PrintProgressReporter
from graphrag.utils.storage import _create_storage, _load_table_from_storage


st.set_page_config(page_title="GraphRAG Chat Interface", page_icon="πŸ”", layout="wide")

# Define default avatars at the module level
DEFAULT_USER_AVATAR = "πŸ‘€"
DEFAULT_BOT_AVATAR = "πŸ€–"

# Initialize session state for avatars
if "user_avatar" not in st.session_state:
    st.session_state.user_avatar = DEFAULT_USER_AVATAR
if "bot_avatar" not in st.session_state:
    st.session_state.bot_avatar = DEFAULT_BOT_AVATAR

# Define avatar images
USER_AVATAR = "πŸ‘€"  # Default user emoji
BOT_AVATAR = "πŸ€–"  # Default bot emoji


class StreamlitProgressReporter(PrintProgressReporter):
    def __init__(self, placeholder):
        super().__init__("")
        self.placeholder = placeholder

    def success(self, message: str):
        self.placeholder.success(message)


def render_chat_tab():
    """Render the Chat tab content."""    
    format_message_history()

    # Chat input
    if prompt := st.chat_input("Enter your query..."):
        # Add user message to history with timestamp
        st.session_state.messages.append(
            {
                "role": "user",
                "content": prompt,
                "timestamp": pd.Timestamp.now().strftime("%H:%M"),
            }
        )

        # Process query
        with st.spinner("Processing your query..."):
            response_placeholder = st.empty()
            try:
                if st.session_state.search_type == "global":
                    response, context = run_global_search(
                        config_filepath=st.session_state.config_filepath,
                        data_dir=st.session_state.data_dir,
                        root_dir=st.session_state.root_dir,
                        community_level=st.session_state.community_level,
                        response_type=st.session_state.response_type,
                        streaming=st.session_state.streaming,
                        query=prompt,
                        progress_placeholder=response_placeholder,
                    )
                elif st.session_state.search_type == "drift":
                    response, context = run_drift_search(
                        config_filepath=st.session_state.config_filepath,
                        data_dir=st.session_state.data_dir,
                        root_dir=st.session_state.root_dir,
                        community_level=st.session_state.community_level,
                        response_type=st.session_state.response_type,
                        streaming=st.session_state.streaming,
                        query=prompt,
                        progress_placeholder=response_placeholder,
                    )
                else:
                    response, context = run_local_search(
                        config_filepath=st.session_state.config_filepath,
                        data_dir=st.session_state.data_dir,
                        root_dir=st.session_state.root_dir,
                        community_level=st.session_state.community_level,
                        response_type=st.session_state.response_type,
                        streaming=st.session_state.streaming,
                        query=prompt,
                        progress_placeholder=response_placeholder,
                    )

                # Clear the placeholder before adding the final response
                response_placeholder.empty()

                # Add assistant response to history with timestamp
                st.session_state.messages.append(
                    {
                        "role": "assistant",
                        "content": response,
                        "timestamp": pd.Timestamp.now().strftime("%H:%M"),
                    }
                )

                # Show context in expander
                with st.expander("View Search Context"):
                    st.json(context)

            except Exception as e:
                error_message = f"Error processing query: {str(e)}"
                st.session_state.messages.append(
                    {
                        "role": "assistant",
                        "content": error_message,
                        "timestamp": pd.Timestamp.now().strftime("%H:%M"),
                    }
                )

        
        st.rerun()


def display_message(msg: str, is_user: bool = False, timestamp: str = "") -> None:
    """Display a chat message with avatar and consistent formatting."""
    role = "user" if is_user else "assistant"
    message_class = "user-message" if is_user else "assistant-message"
    avatar = st.session_state.user_avatar if is_user else st.session_state.bot_avatar

    message_container = f"""
        <div class="chat-message {message_class}">
            <div class="avatar">
                <div style="font-size: 25px; text-align: center;">{avatar}</div>
            </div>
            <div class="message-content-wrapper">
                <div class="message-bubble">
                    <div class="message-content">
                        {msg}
                    </div>
                </div>
                <div class="timestamp">{timestamp}</div>
            </div>
        </div>
    """
    st.markdown(message_container, unsafe_allow_html=True)


def format_message_history() -> None:
    """Display all messages in the chat history with consistent formatting."""
    st.markdown('<div class="chat-container">', unsafe_allow_html=True)
    for message in st.session_state.messages:
        timestamp = message.get("timestamp", "")
        display_message(
            msg=message["content"],
            is_user=(message["role"] == "user"),
            timestamp=timestamp,
        )
    st.markdown("</div>", unsafe_allow_html=True)


@st.cache_resource
def load_css():
    with open("styles.css", "r") as f:
        return f.read()


def initialize_session_state():
    """Initialize session state variables if they don't exist."""
    if "messages" not in st.session_state:
        st.session_state.messages = []
    if "response_placeholder" not in st.session_state:
        st.session_state.response_placeholder = None
    if "config_filepath" not in st.session_state:
        st.session_state.config_filepath = None
    if "data_dir" not in st.session_state:
        st.session_state.data_dir = None
    if "root_dir" not in st.session_state:
        st.session_state.root_dir = "."
    if "community_level" not in st.session_state:
        st.session_state.community_level = 2
    if "response_type" not in st.session_state:
        st.session_state.response_type = "concise"
    if "search_type" not in st.session_state:
        st.session_state.search_type = "global"
    if "streaming" not in st.session_state:
        st.session_state.streaming = True
    if "authenticated" not in st.session_state:
        st.session_state.authenticated = False


def main():
    initialize_session_state()

    # Authentication check
    if not st.session_state.authenticated:
        if auth.check_credentials():
            st.session_state.authenticated = True
            st.rerun()  # Rerun to reflect the authentication state
        else:
            st.stop()  # Stop further execution if authentication fails

    # If authenticated, proceed with the main app
    if st.session_state.authenticated:
        # Main application content
        st.title("PWC Home Assignment #1, Graphrag")

        css = load_css()
        st.markdown(f"<style>{css}</style>", unsafe_allow_html=True)

        # Sidebar configuration
        with st.sidebar:
            # Display logos side by side at the top of the sidebar
            col1, col2 = st.columns(2)            
            with col1:
                st.markdown(
                    '<div class="logo-container"><img class="logo-image" src="https://nexttech.pwc.co.il/wp-content/uploads/2023/12/image-2.png"></div>',
                    unsafe_allow_html=True,
                )
            with col2:
                st.markdown(
                    '<div class="logo-container"><img class="logo-image" src="https://nexttech.pwc.co.il/wp-content/uploads/2023/12/Frame.png"></div>',
                    unsafe_allow_html=True,
                )

            st.header("Configuration")
            st.session_state.community_level = st.number_input(
                "Community Level",
                min_value=0,
                max_value=10,
                value=st.session_state.community_level,
                help="Controls the granularity of the search...",
            )

            # Only show response type for global and local search
            if st.session_state.search_type != "drift":
                st.session_state.response_type = st.selectbox(
                    "Response Type",
                    options=["concise", "detailed"],
                    index=0 if st.session_state.response_type == "concise" else 1,
                    help="Style of response generation",
                )

            st.session_state.search_type = st.selectbox(
                "Search Type",
                options=["global", "local", "drift"],
                index=(
                    0
                    if st.session_state.search_type == "global"
                    else 1 if st.session_state.search_type == "local" else 2
                ),
                help="""Search Types:
                - Local Search: "Focuses on finding specific information by searching through direct connections in the knowledge graph. Best for precise, fact-based queries."
                - Global Search: "Analyzes the entire document collection at a high level using community summaries. Best for understanding broad themes and general policies."
                - DRIFT Search: "Combines local and global search capabilities, dynamically exploring connections while gathering detailed information. Best for complex queries requiring both specific details and broader context."
                """,
            )

            # Show streaming option only for supported search types
            if st.session_state.search_type != "drift":
                st.session_state.streaming = st.checkbox(
                    "Enable Streaming",
                    value=st.session_state.streaming,
                    help="Stream response tokens as they're generated",
                )
            else:
                st.session_state.streaming = False
                st.info("Streaming is not available for DRIFT search")

            # logout button
            if st.button("Logout"):
                st.session_state.clear()  # Clear all session state data
                initialize_session_state()  # Reinitialize the session state
                st.query_params = {"restart": "true"}  # Refresh the UI
                st.rerun()

        # Create tabs
        tab1, tab2 = st.tabs(["Assignment Documentation", "Chat"])

        # readme tab content
        with tab1:            
            render_wiki_tab()

        # Chat tab content
        with tab2:
            render_chat_tab()

        st.sidebar.markdown(
            """
            <div style="position: absolute; bottom: 0; width: 100%; text-align: center; font-size: 14px; margin-bottom: -200px;">
                Liran Baba | 
                <a href="https://linkedin.com/in/liranba" target="_blank">LinkedIn</a> | 
                <a href="https://huggingface.co/CordwainerSmith" target="_blank">HuggingFace</a>
            </div>
            """,
            unsafe_allow_html=True,
        )


if __name__ == "__main__":
    main()