Spaces:

ababio
/

pdfReport

Sleeping

File size: 2,669 Bytes

9eeafb7
8c67ed3
 
95d85ed
 
9eeafb7
 
ff02082
63614ef
 
 
056775b
e6f156e
8c67ed3
 
 
e6f156e
8c67ed3
e6f156e
63614ef
 
 
 
95d85ed
8c67ed3
63614ef
9eeafb7
63614ef
9eeafb7
 
 
 
 
 
8c67ed3
9eeafb7
8c67ed3
 
63614ef
 
9eeafb7
 
 
 
63614ef
8c67ed3
 
 
 
 
 
 
 
 
 
63614ef
8c67ed3

import os
import streamlit as st
from dotenv import load_dotenv
from pinecone.grpc import PineconeGRPC
from pinecone import ServerlessSpec
from llama_index.embeddings import OpenAIEmbedding
from llama_index.ingestion import IngestionPipeline
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.vector_stores import PineconeVectorStore
from llama_index.node_parser import SemanticSplitterNodeParser
from llama_index.retrievers import VectorIndexRetriever
from htmlTemplates import css, bot_template, user_template

# Load environment variables
load_dotenv()
pinecone_api_key = os.getenv("PINECONE_API_KEY")
openai_api_key = os.getenv("OPENAI_API_KEY")
index_name = os.getenv("INDEX_NAME")

# Initialize OpenAI embedding model
embed_model = OpenAIEmbedding(api_key=openai_api_key)

# Initialize connection to Pinecone
pinecone_client = PineconeGRPC(api_key=pinecone_api_key)
pinecone_index = pinecone_client.Index(index_name)
vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

# Define the initial pipeline
pipeline = IngestionPipeline(
    transformations=[
        SemanticSplitterNodeParser(
            buffer_size=1,
            breakpoint_percentile_threshold=95,
            embed_model=embed_model,
        ),
        embed_model,
    ],
)

# Initialize LlamaIndex components
vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5)
query_engine = RetrieverQueryEngine(retriever=retriever)

# Function to handle user input and return the query response
def handle_userinput(user_question):
    response = st.session_state.conversation({'question': user_question})
    st.session_state.chat_history = response['chat_history']
    
    for i, message in enumerate(st.session_state.chat_history):
        if i % 2 == 0:
            st.write(user_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
        else:
            st.write(bot_template.replace("{{MSG}}", message.content), unsafe_allow_html=True)

# Main function to run the Streamlit app
def main():
    load_dotenv()
    st.set_page_config(page_title="Chat with Annual Reports", page_icon=":books:")
    st.write(css, unsafe_allow_html=True)

    if "conversation" not in st.session_state:
        st.session_state.conversation = None
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = None

    st.header("Chat with Annual Report Documents")
    user_question = st.text_input("Ask a question about your documents:")
    if user_question:
        handle_userinput(user_question)

if __name__ == "__main__":
    main()