import streamlit as st from dotenv import load_dotenv import os from htmlTemplate import css, bot_template, user_template import PyPDF2 from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.embeddings.spacy_embeddings import SpacyEmbeddings from langchain_community.llms import LlamaCpp from langchain.embeddings import HuggingFaceEmbeddings from langchain.vectorstores import FAISS from langchain.memory import ConversationBufferMemory from langchain.chains import ConversationalRetrievalChain from langchain.prompts import PromptTemplate from sentence_transformers import SentenceTransformer, util from langchain_openai import AzureOpenAIEmbeddings from langchain_openai import OpenAIEmbeddings from langchain_community.embeddings.fastembed import FastEmbedEmbeddings from langchain_openai import ChatOpenAI def main(): load_dotenv() st.set_page_config( page_title="PDF Insights AI", page_icon=":books:", layout="wide" ) st.write(css, unsafe_allow_html=True) # Welcome section st.title("📚 PDF Insights AI") st.markdown(""" ### Unlock the Knowledge in Your PDFs - 🤖 AI-powered document analysis - 💬 Ask questions about your uploaded documents - 📄 Support for multiple PDF files """) if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = [] # File upload section with st.sidebar: st.header("📤 Upload Documents") pdf_docs = st.file_uploader( "Upload your PDFs here", type=['pdf'], accept_multiple_files=True, help="Upload PDF files to analyze. Max file size: 200MB" ) # File validation if pdf_docs: for doc in pdf_docs: if doc.size > 200 * 1024 * 1024: # 200 MB st.error(f"File {doc.name} is too large. Maximum file size is 200MB.") pdf_docs.remove(doc) if st.button("Process Documents", type="primary"): if not pdf_docs: st.warning("Please upload at least one PDF file.") else: with st.spinner("Processing your documents..."): try: # get pdf text content, metadata = prepare_docs(pdf_docs) # get the text chunks split_docs = get_text_chunks(content, metadata) # create vector store vectorstore = ingest_into_vectordb(split_docs) # create conversation chain st.session_state.conversation = get_conversation_chain(vectorstore) st.success("Documents processed successfully! You can now ask questions.") except Exception as e: st.error(f"An error occurred while processing documents: {str(e)}") # Question input section user_question = st.text_input( "📝 Ask a question about your documents", placeholder="What insights can you provide from these documents?" ) if user_question: if st.session_state.conversation is None: st.warning("Please upload and process documents first.") else: handle_userinput(user_question)