import os import gradio as gr import qdrant_client from llama_index.core import Settings, VectorStoreIndex, StorageContext from llama_index.vector_stores.qdrant import QdrantVectorStore from llama_index.embeddings.fastembed import FastEmbedEmbedding from llama_index.llms.gemini import Gemini from llama_index.core.memory import ChatMemoryBuffer from llama_index.readers.web import FireCrawlWebReader import dotenv import time dotenv.load_dotenv() # Global variables index = None chat_engine = None collection_name = "" def embed_setup(): Settings.embed_model = FastEmbedEmbedding(model_name="BAAI/bge-small-en-v1.5") Settings.llm = Gemini(temperature=0.1, model_name="models/gemini-pro") def qdrant_setup(): client = qdrant_client.QdrantClient( os.getenv("QDRANT_URL"), api_key=os.getenv("QDRANT_API_KEY"), ) return client def ingest_documents(url): firecrawl_reader = FireCrawlWebReader( api_key=os.getenv("FIRECRAWL_API_KEY"), mode="scrape", ) documents = firecrawl_reader.load_data(url=url) return documents def setup_query_engine(url, coll_name): global index, chat_engine, collection_name collection_name = coll_name embed_setup() client = qdrant_setup() vector_store = QdrantVectorStore(client=client, collection_name=collection_name) storage_context = StorageContext.from_defaults(vector_store=vector_store) if url: documents = ingest_documents(url) index = VectorStoreIndex.from_documents(documents, vector_store=vector_store, storage_context=storage_context) else: index = VectorStoreIndex.from_vector_store(vector_store=vector_store, storage_context=storage_context) memory = ChatMemoryBuffer.from_defaults(token_limit=4000) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory, system_prompt=( """You are an AI assistant for developers, specializing in technical documentation. Your task is to provide accurate, concise, and helpful responses based on the given documentation context. Context information is below: {context_str} Always answer based on the information in the context and general knowledge and be precise Given this context, please respond to the following user query: {query_str} Your response should: Directly address the query using information from the context Include relevant code examples or direct quotes if applicable Mention specific sections or pages of the documentation Highlight any best practices or potential pitfalls related to the query After your response, suggest 3 follow-up questions based on the context that the user might find helpful for deeper understanding. ALWAYS SUGGEST FOLLOW UP QUESTIONS Your response:""" ), ) return "Query engine setup completed successfully!" def query_documentation(query): global chat_engine if not chat_engine: return "Please set up the query engine first." try: response = chat_engine.chat(query) return str(response.response) except Exception as e: error_message = f"An error occurred: {str(e)}" time.sleep(120) try: response = chat_engine.chat(query) return str(response.response) except Exception as e: return f"Retry failed. Error: {str(e)}" # Gradio interface with gr.Blocks() as app: gr.Markdown("# Talk to Software Documentation") with gr.Tab("Setup"): url_input = gr.Textbox(label="Enter URL to crawl and ingest documents (optional)") collection_input = gr.Textbox(label="Enter collection name for vector store (compulsory)") setup_button = gr.Button("Setup Query Engine") setup_output = gr.Textbox(label="Setup Output") setup_button.click(setup_query_engine, inputs=[url_input, collection_input], outputs=setup_output) with gr.Tab("Chat"): chatbot = gr.Chatbot() msg = gr.Textbox(label="Enter your query") clear = gr.Button("Clear") def user(user_message, history): return "", history + [[user_message, None]] def bot(history): user_message = history[-1][0] bot_message = query_documentation(user_message) history[-1][1] = bot_message return history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) clear.click(lambda: None, None, chatbot, queue=False) if __name__ == "__main__": app.launch()