try: from langchain_community.vectorstores import Chroma except: from langchain_community.vectorstores import Chroma from langchain.chains import ConversationChain from langchain.chains.conversation.memory import ConversationBufferWindowMemory # Import the necessary libraries. from langchain_core.prompts import ChatPromptTemplate from langchain_groq import ChatGroq import os import requests # Or your Groq library groq_api_key = os.environ.get("my_groq_api_key") # Initialize a ChatGroq object with a temperature of 0 and the "mixtral-8x7b-32768" model. llm = ChatGroq(temperature=0, model_name="llama3-70b-8192",api_key=groq_api_key) from langchain_community.embeddings import SentenceTransformerEmbeddings embeddings = SentenceTransformerEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"trust_remote_code":True}) memory = ConversationBufferWindowMemory( memory_key="history", k=3, return_only_outputs=True ) query_text="what did alice say to rabbit" # Prepare the DB. #embedding_function = OpenAIEmbeddings() # main CHROMA_PATH = "chroma8" # call the chroma generated in a directory db = Chroma(persist_directory=CHROMA_PATH, embedding_function=embeddings) # Search the DB for similar documents to the query. results = db.similarity_search_with_relevance_scores(query_text, k=2) if len(results) == 0 or results[0][1] < 0.5: print(f"Unable to find matching results.") from langchain import PromptTemplate query_text = "when did alice see mad hatter" results = db.similarity_search_with_relevance_scores(query_text, k=3) if len(results) == 0 or results[0][1] < 0.5: print(f"Unable to find matching results.") context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ]) template = """ The following is a conversation between a human an AI. Answer question based only on the conversation. Current conversation: {history} """ s=""" \n question: {input} \n answer:""".strip() prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s) chain = ConversationChain( llm=llm, prompt=prompt, memory=memory, verbose=True, ) # Generate a response from the Llama model def get_llama_response(message: str, history: list) -> str: """ Generates a conversational response from the Llama model. Parameters: message (str): User's input message. history (list): Past conversation history. Returns: str: Generated response from the Llama model. """ query_text =message results = db.similarity_search_with_relevance_scores(query_text, k=2) if len(results) == 0 or results[0][1] < 0.5: print(f"Unable to find matching results.") context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results ]) template = """ The following is a conversation between a human an AI. Answer question based only on the conversation. Current conversation: {history} """ s=""" \n question: {input} \n answer:""".strip() prompt = PromptTemplate(input_variables=["history", "input"], template=template+context_text+'\n'+s) #print(template) chain.prompt=prompt res = chain.predict(input=query_text) return res #return response.strip() import gradio as gr iface = gr.Interface(fn=get_llama_response, inputs=gr.Textbox(), outputs="textbox") iface.launch(share=True)