import os from getpass import getpass import gradio as gr from pinecone.grpc import PineconeGRPC from pinecone import ServerlessSpec from llama_index.vector_stores import PineconeVectorStore from llama_index import VectorStoreIndex from llama_index.retrievers import VectorIndexRetriever from llama_index.node_parser import SemanticSplitterNodeParser from llama_index.embeddings import OpenAIEmbedding from llama_index.ingestion import IngestionPipeline from llama_index.query_engine import RetrieverQueryEngine pinecone_api_key = os.getenv("PINECONE_API_KEY") openai_api_key = os.getenv("OPENAI_API_KEY") # This will be the model we use both for Node parsing and for vectorization embed_model = OpenAIEmbedding(api_key=openai_api_key) # Define the initial pipeline pipeline = IngestionPipeline( transformations=[ SemanticSplitterNodeParser( buffer_size=1, breakpoint_percentile_threshold=95, embed_model=embed_model, ), embed_model, ], ) # Initialize connection to Pinecone pc = PineconeGRPC(api_key=pinecone_api_key) index_name = "anualreport" # Initialize your index pinecone_index = pc.Index(index_name) # Initialize VectorStore vector_store = PineconeVectorStore(pinecone_index=pinecone_index) pinecone_index.describe_index_stats() # Due to how LlamaIndex works here, if your Open AI API key was # not set to an environment variable before, you have to set it at this point if not os.getenv('OPENAI_API_KEY'): os.environ['OPENAI_API_KEY'] = openai_api_key # Instantiate VectorStoreIndex object from our vector_store object vector_index = VectorStoreIndex.from_vector_store(vector_store=vector_store) # Grab 5 search results retriever = VectorIndexRetriever(index=vector_index, similarity_top_k=5) # Pass in your retriever from above, which is configured to return the top 5 results query_engine = RetrieverQueryEngine(retriever=retriever) # Define the function to handle user input and return the query response def query_annual_report(summary_request): llm_query = query_engine.query(summary_request) return llm_query.response # Create the Gradio interface iface = gr.Interface( fn=query_annual_report, inputs="text", outputs="text", title="Annual Report Summary Query", description="Enter your query to get the summary of the annual report." ) # Launch the Gradio app iface.launch()