# version for gradio
import gradio as gr
from llama_index.vector_stores.mongodb import MongoDBAtlasVectorSearch
import pymongo
from pymongo.mongo_client import MongoClient
from llama_index.core.vector_stores.types import VectorStoreQuery
from langchain_nomic.embeddings import NomicEmbeddings
import os
###### load LLM
os.system("ollama pull llama3.2:3b-instruct-fp16")
# LLM
from langchain_ollama import ChatOllama

local_llm = "llama3.2:3b-instruct-fp16"
llm = ChatOllama(model=local_llm, temperature=0)
llm_json_mode = ChatOllama(model=local_llm, temperature=0, format="json")

# load embedding model
# sentence transformers
embed_model = NomicEmbeddings(model="nomic-embed-text-v1.5", inference_mode="local")

# Load vector database

MONGO_URI = os.getenv("MONGO_URI")
os.environ["MONGODB_URI"] = MONGO_URI
DB_NAME = os.getenv("DB_NAME")
COLLECTION_NAME = os.getenv("COLLECTION_NAME")
# Connect to your Atlas deployment
mongo_client = MongoClient(MONGO_URI)
collection = mongo_client[DB_NAME][COLLECTION_NAME]
#

vector_store = MongoDBAtlasVectorSearch(mongo_client, db_name=DB_NAME, collection_name=COLLECTION_NAME, vector_index_name="default")
#
# print(query_results.nodes[0].text)

# COMPONENT
### Router
import json
from langchain_core.messages import HumanMessage

# Prompt
### Generate

# Prompt
rag_prompt = """Você é um assistente multilíngue para tarefas de resposta a perguntas.

Aquí está o contexto a ser usado para responder à pergunta:

{context} 

Pense cuidadosamente acerca do contexto de acima.

Agora, revise a pergunta do usuario:

{question}

Forneça uma resposta a essas perguntas usando apenas o contexto acima. 

Mantenha sua resposta formal e concisa.

Resposta:"""

# Post-processing
def format_docs(nodes):
    return "\n\n".join(doc.text for doc in nodes)

########### FOR CHAT
def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    top_k,
):
    messages = [{"role": "system", "content": system_message}]

    for val in history:
        if val[0]:
            messages.append({"role": "user", "content": val[0]})
        if val[1]:
            messages.append({"role": "assistant", "content": val[1]})
    #
    # build the query engine
    ####
    query_str = message
    question = query_str
    #
    query_embedding = embed_model.embed_query(query_str)
    vector_store_query = VectorStoreQuery(query_embedding,similarity_top_k = top_k)
    # Recover index
    query_results = vector_store.query(vector_store_query)
    docs = query_results.nodes
    docs_txt = format_docs(docs)
    rag_prompt_formatted = rag_prompt.format(context=docs_txt, question=question)
    #print(rag_prompt_formatted)
    generation = llm.invoke([HumanMessage(content=rag_prompt_formatted)])
    #
    return generation.content
#
"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="Qual é sua pergunta?", label="System message"),
        gr.Slider(minimum=1, maximum=10, value=3, step=1, label="top-k"),
    ],
)


if __name__ == "__main__":
    demo.launch()