Spaces:

nde-dilan
/

nerala_ai_backend

Runtime error

App Files Files Community

Nde Dilan commited on Mar 7

Commit

d4e21df

1 Parent(s): a5363fd

Add application file

Browse files

Files changed (1) hide show

streamlit.py +130 -0

streamlit.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import streamlit as st
+import os
+from pathlib import Path
+import time
+from main import PDFProcessor, SecurityException
+# Configure page
+st.set_page_config(
+    page_title="PDF Query Engine",
+    page_icon="📚",
+    layout="wide",
+)
+# Initialize processor
+@st.cache_resource
+def get_processor():
+    return PDFProcessor()
+processor = get_processor()
+# Create upload directory if it doesn't exist
+upload_dir = Path("./uploads")
+upload_dir.mkdir(exist_ok=True)
+# Title and description
+st.title("PDF Query Engine 🔍")
+st.markdown("""
+This application allows you to extract information from PDF documents using natural language queries.
+Upload a PDF, wait for it to be processed, then ask questions about its content!
+""")
+# Sidebar
+with st.sidebar:
+    st.header("About")
+    st.info("""
+    This tool uses natural language processing to extract and query information from PDFs.
+    **Features:**
+    - Extract text from PDFs
+    - Process into semantic chunks
+    - Query using natural language
+    - Get relevant context from the document
+    """)
+    st.header("Instructions")
+    st.markdown("""
+    1. Upload a PDF file (max 26MB)
+    2. Wait for processing to complete
+    3. Type your question in the query box
+    4. Review the results
+    """)
+# File uploader
+uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
+# Process the uploaded file
+if uploaded_file is not None:
+    # Save the uploaded file temporarily
+    temp_file_path = os.path.join(upload_dir, uploaded_file.name)
+    with open(temp_file_path, "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    # Check if file has already been processed
+    file_hash = processor.get_file_hash(temp_file_path)
+    persist_directory = os.path.join(processor.config["db_directory"], file_hash)
+    already_processed = os.path.exists(persist_directory)
+    # Display file info
+    col1, col2 = st.columns(2)
+    with col1:
+        st.success(f"File uploaded: {uploaded_file.name}")
+        # Show file size
+        file_size = os.path.getsize(temp_file_path) / (1024 * 1024)  # Convert to MB
+        st.info(f"File size: {file_size:.2f} MB")
+    with col2:
+        if already_processed:
+            st.info("This file has already been processed and is ready for querying.")
+            process_button = st.button("Re-process file")
+        else:
+            st.warning("This file needs to be processed before querying.")
+            process_button = st.button("Process file")
+    # Process the file when button is clicked
+    if process_button:
+        try:
+            with st.spinner("Processing PDF... This may take a minute."):
+                # Process file
+                vector_store = processor.process_file(temp_file_path)
+                if vector_store:
+                    st.success("PDF processed successfully! You can now query the document.")
+                else:
+                    st.error("Failed to process PDF. The file might be empty or corrupted.")
+        except SecurityException as e:
+            st.error(f"Security error: {str(e)}")
+        except Exception as e:
+            st.error(f"Error processing file: {str(e)}")
+    # Query interface
+    st.header("Ask questions about the document")
+    # Check if the document can be queried
+    can_query = os.path.exists(persist_directory)
+    if can_query:
+        query = st.text_input("Enter your question:")
+        k_value = st.slider("Number of results to return", min_value=1, max_value=10, value=3)
+        if st.button("Search") and query:
+            with st.spinner("Searching for answers..."):
+                try:
+                    results = processor.query_document(temp_file_path, query, k=k_value)
+                    if not results:
+                        st.info("No relevant information found. Try rephrasing your question.")
+                    else:
+                        st.subheader("Search Results")
+                        for i, doc in enumerate(results):
+                            with st.expander(f"Result {i+1}"):
+                                st.markdown(doc.page_content)
+                except Exception as e:
+                    st.error(f"Error during query: {str(e)}")
+    else:
+        st.info("Please process the document before querying.")
+# Add footer
+st.markdown("---")
+st.markdown("PDF Query Engine | Built with Streamlit and LangChain")