|
import streamlit as st |
|
import os |
|
from typing import List |
|
import time |
|
from pdf_utils import extract_text_from_file, split_text |
|
from chroma_db_utils import create_chroma_db |
|
from query_handler import handle_query |
|
|
|
def initialize_session_state(): |
|
"""Initialize session state variables.""" |
|
if 'messages' not in st.session_state: |
|
st.session_state.messages = [] |
|
if 'db' not in st.session_state: |
|
st.session_state.db = None |
|
if 'chunks' not in st.session_state: |
|
st.session_state.chunks = [] |
|
|
|
def process_uploaded_file(uploaded_file) -> List[str]: |
|
"""Process the uploaded file and return text chunks.""" |
|
|
|
with open(uploaded_file.name, "wb") as f: |
|
f.write(uploaded_file.getbuffer()) |
|
|
|
try: |
|
|
|
extracted_text = extract_text_from_file(uploaded_file.name) |
|
if extracted_text: |
|
|
|
chunks = split_text(extracted_text) |
|
return chunks |
|
else: |
|
st.error("No text could be extracted from the file.") |
|
return [] |
|
finally: |
|
|
|
if os.path.exists(uploaded_file.name): |
|
os.remove(uploaded_file.name) |
|
|
|
def main(): |
|
st.title("π Document Q&A System") |
|
|
|
|
|
initialize_session_state() |
|
|
|
|
|
with st.sidebar: |
|
st.header("Document Upload") |
|
uploaded_file = st.file_uploader( |
|
"Upload your document", |
|
type=['pdf', 'docx', 'txt'], |
|
help="Supported formats: PDF, DOCX, TXT" |
|
) |
|
|
|
if uploaded_file: |
|
with st.spinner("Processing document..."): |
|
|
|
chunks = process_uploaded_file(uploaded_file) |
|
|
|
if chunks: |
|
|
|
st.session_state.chunks = chunks |
|
st.session_state.db = create_chroma_db(chunks) |
|
st.success(f"Document processed! Created {len(chunks)} chunks.") |
|
|
|
|
|
if not st.session_state.messages: |
|
st.session_state.messages.append({ |
|
"role": "system", |
|
"content": "I've processed your document. You can now ask questions about it!" |
|
}) |
|
|
|
|
|
st.header("π¬ Chat") |
|
|
|
|
|
for message in st.session_state.messages: |
|
with st.chat_message(message["role"]): |
|
st.write(message["content"]) |
|
|
|
|
|
if prompt := st.chat_input("Ask a question about your document"): |
|
|
|
if st.session_state.db is None: |
|
st.error("Please upload a document first!") |
|
return |
|
|
|
|
|
st.session_state.messages.append({"role": "user", "content": prompt}) |
|
|
|
|
|
with st.chat_message("user"): |
|
st.write(prompt) |
|
|
|
|
|
with st.chat_message("assistant"): |
|
with st.spinner("Thinking..."): |
|
try: |
|
response = handle_query(prompt, st.session_state.db) |
|
st.write(response) |
|
|
|
|
|
st.session_state.messages.append({ |
|
"role": "assistant", |
|
"content": response |
|
}) |
|
except Exception as e: |
|
st.error(f"Error generating response: {str(e)}") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|