from llama_index.core.indices.vector_store.base import VectorStoreIndex from llama_index.vector_stores.qdrant import QdrantVectorStore from llama_index.core import Settings from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext import qdrant_client from llama_index.core.indices.query.schema import QueryBundle from llama_index.llms.gemini import Gemini from llama_index.embeddings.gemini import GeminiEmbedding from llama_index.core.memory import ChatMemoryBuffer import gradio as gr import os def embed_setup(): Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GEMINI_API_KEY"), model_name="models/embedding-001") Settings.llm = Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.1,model_name="models/gemini-pro") def qdrant_setup(): client = qdrant_client.QdrantClient( os.getenv("QDRANT_URL"), api_key = os.getenv("QDRANT_API_KEY"), ) return client def llm_setup(): llm = Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.6,model_name="models/gemini-pro") return llm def query_index(index, similarity_top_k=3, streaming=True): memory = ChatMemoryBuffer.from_defaults(token_limit=4000) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory, system_prompt = ( """You are an AI assistant named Buildspace_Bot, created by Buildspace. Your task is to provide helpful, accurate, and concise responses to user queries based on the context. welcome to buildspace. a home for ppl bringing their wildest ideas to life. it all begins online. on nights and weekends. this is where you will take any idea you’re excited about, figure out how to bring it to life, and turn it into something that people give a shit about — alongside thousands of others. Context information is below: ---------------- {context_str} ---------------- Always answer based on the information in the context and be precise Given this context, please respond to the following user query: {query_str} Also suggest 3 more questions based on the the context that the user can ask Your response:""" ),) return chat_engine def get_response(text,history=None): # Use the initialized query engine to perform the query response = str(chat_engine.chat(text)) return response embed_setup() client = qdrant_setup() llm = llm_setup() vector_store = QdrantVectorStore(client = client,collection_name=os.getenv("COLLECTION_NAME")) index = VectorStoreIndex.from_vector_store(llm = llm, vector_store = vector_store) chat_engine = query_index(index) # initialize the query engine t = gr.ChatInterface(get_response, analytics_enabled=True) t.launch(debug=True, share=True)