File size: 3,241 Bytes
6eec715 5da247a 6eec715 5da247a da96c79 5da247a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
import gradio as gr
import os
from dotenv import load_dotenv
load_dotenv()
# Use followin json data to feed to Chroma
import json
with open("data/processed/final_data_for_vectorstore.json",'r') as file:
data4chroma= json.load(file)
# Initiate vector store
from langchain_community.vectorstores import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
embedding_function=HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2')
vectorstore=Chroma.from_texts(texts=data4chroma['chunks'],
embedding=embedding_function,
ids=data4chroma["chunk_ids"],
metadatas=data4chroma["chunk_metadatas"],
collection_name='qual_books',
)
from langchain_core.prompts import ChatPromptTemplate
template="""You are a helpful AI assistant. Please answer the query based on provided context.\
*Do not make any assumptions if you don't know the answer. In that case just respond by saying\
the answer of query cannot be found in the given context.
*The English of the provided text is not well-structured. You should respond with the same content but in improved, clear, and correct English, without simply copying the original text.
*Also provide the response in bullet points but in detail where necessary.
Context: {context}
Query: {question}
Answer:
"""
prompt= ChatPromptTemplate.from_template(template)
from langchain_huggingface import HuggingFaceEndpoint
llm=HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3.1-70B-Instruct",
max_new_tokens=3000,
top_k=20,
top_p=0.95,
typical_p=0.95,
temperature=0.001,
repetition_penalty=1.03,
huggingfacehub_api_token=os.getenv("huggingfacehub_api_token")
)
chain = prompt | llm
def respond(
query: str,
data_type: str = "Preprocessed doc",
llm_chain = chain,
vectorstore=vectorstore
):
"""
Generate a response to a user query using document retrieval and language model
completion
Parameters:
chatbot (List): List representing the chatbot's conversation history.
message (str): The user's query.
data_type (str): Type of data used for document retrieval
temperature (float);
Returns:
Tuple: A tuple containing an empty string, the updated chat history,
and reference from retrieved documents
"""
# Retrieve embedding function from code env resources
if data_type=="Preprocessed doc":
retriever=vectorstore.as_retriever(search_type="mmr",
search_kwargs={"k":10,"fetch_k":100})
retrieved_docs=retriever.invoke(query)
input_2_chain={"context": retrieved_docs, "question":query}
response=llm_chain.invoke(input_2_chain)
return response
demo = gr.Interface(fn=respond, inputs="text", outputs="text")
demo.launch(share=True) |