try:
  from langchain_community.vectorstores import Chroma
except:
  from langchain_community.vectorstores import Chroma

from langchain.chains import ConversationChain
from langchain.chains.conversation.memory import ConversationBufferWindowMemory


# Import the necessary libraries.
from langchain_core.prompts import ChatPromptTemplate
from langchain_groq import ChatGroq


import os
import requests  # Or your Groq library

groq_api_key = os.environ.get("my_groq_api_key")


# Initialize a ChatGroq object with a temperature of 0 and the "mixtral-8x7b-32768" model.
llm = ChatGroq(temperature=0, model_name="llama3-70b-8192",api_key=groq_api_key)

from langchain_community.embeddings import SentenceTransformerEmbeddings

embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"trust_remote_code":True}) 


memory = ConversationBufferWindowMemory(
    memory_key="history", k=3, return_only_outputs=True
)


query_text="what did alice say to rabbit"

# Prepare the DB.
#embedding_function = OpenAIEmbeddings() # main

CHROMA_PATH = "chroma8"
# call the chroma generated in a directory
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings)

# Search the DB for similar documents to the query.
results = db.similarity_search_with_relevance_scores(query_text, k=2)
if len(results) == 0 or results[0][1] < 0.5:
    print(f"Unable to find matching results.")


from langchain import PromptTemplate
query_text = "when did alice see mad hatter"

results = db.similarity_search_with_relevance_scores(query_text, k=3)
if len(results) == 0 or results[0][1] < 0.5:
    print(f"Unable to find matching results.")


context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])

template = """
The following is a conversation between a human an AI. Answer  question based only on the conversation.

Current conversation:
{history}

"""


s="""

\n question: {input}

\n answer:""".strip()


prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)


chain = ConversationChain(
    llm=llm,
    
    prompt=prompt,
    memory=memory,
    verbose=True,
)


# Generate a response from the Llama model
def get_llama_response(message: str, history: list) -> str:
    """
    Generates a conversational response from the Llama model.

    Parameters:
        message (str): User's input message.
        history (list): Past conversation history.

    Returns:
        str: Generated response from the Llama model.
    """
    query_text =message

    results = db.similarity_search_with_relevance_scores(query_text, k=2)
    if len(results) == 0 or results[0][1] < 0.5:
        print(f"Unable to find matching results.")


    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ])

    template = """
    The following is a conversation between a human an AI. Answer  question based only on the conversation.

    Current conversation:
    {history}

    """


    s="""

    \n question: {input}

    \n answer:""".strip()


    prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s)

    #print(template)
    chain.prompt=prompt
    res = chain.predict(input=query_text)
    return res
        #return response.strip()


import gradio as gr
iface = gr.Interface(fn=get_llama_response, inputs=gr.Textbox(),
             outputs="textbox")
iface.launch(share=True)