from llama_index.core.indices.vector_store.base import VectorStoreIndex
from llama_index.vector_stores.qdrant import QdrantVectorStore
from llama_index.core import Settings
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, StorageContext
import qdrant_client
from llama_index.core.indices.query.schema import QueryBundle
from llama_index.llms.gemini import Gemini
from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.core.memory import ChatMemoryBuffer
import gradio as gr
import os

def embed_setup():
  Settings.embed_model = GeminiEmbedding(api_key=os.getenv("GEMINI_API_KEY"), model_name="models/embedding-001")
  Settings.llm =  Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.1,model_name="models/gemini-pro")

def qdrant_setup():
  client = qdrant_client.QdrantClient(
    os.getenv("QDRANT_URL"),
    api_key = os.getenv("QDRANT_API_KEY"),
  )
  return client

def llm_setup():
  llm = Gemini(api_key=os.getenv("GEMINI_API_KEY"), temperature=0.6,model_name="models/gemini-pro")
  return llm


def query_index(index, similarity_top_k=3, streaming=True):
    memory = ChatMemoryBuffer.from_defaults(token_limit=4000)
    chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    system_prompt = (
        """You are an AI assistant named Buildspace_Bot, created by Buildspace. Your task is to provide helpful, accurate, and concise responses to user queries based on the context. 
    welcome to buildspace.
    a home for ppl bringing their wildest
    ideas to life. it all begins online. on nights and weekends.
    this is where you will take any idea you’re excited about, figure out how to bring it to life, and turn it into something that people give a shit about — alongside thousands of others. 

    Context information is below:
    ----------------
    {context_str}
    ----------------
    Always answer based on the information in the context and be precise
    Given this context, please respond to the following user query:
    {query_str}

    Also suggest 3 more questions based on the the context that the user can ask
    Your response:"""
    ),)
    return chat_engine 

def get_response(text,history=None):
    # Use the initialized query engine to perform the query
    response = str(chat_engine.chat(text))
    return response

embed_setup()
client = qdrant_setup()
llm = llm_setup()
vector_store = QdrantVectorStore(client = client,collection_name=os.getenv("COLLECTION_NAME"))
index = VectorStoreIndex.from_vector_store(llm = llm, vector_store = vector_store)
chat_engine = query_index(index)  # initialize the query engine


t = gr.ChatInterface(get_response, analytics_enabled=True)
t.launch(debug=True, share=True)