File size: 2,329 Bytes
0187b8f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import os
import zipfile
import gradio as gr
from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.vectorstores import Chroma # Light-weight and in memory
from langchain.chains import RetrievalQA
# Authentication for Huggingface API
HF_TOKEN = os.getenv("HF_TOKEN")
os.environ["HF_TOKEN"] = HF_TOKEN
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN
# Initialization of LLM
llm = HuggingFaceEndpoint(
repo_id="HuggingFaceH4/zephyr-7b-beta",
task="text-generation",
max_new_tokens = 512,
top_k = 30,
temperature = 0.1,
repetition_penalty = 1.03,
)
## Embeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
#modelPath = "sentence-transformers/all-MiniLM-l6-v2"
modelPath ="mixedbread-ai/mxbai-embed-large-v1"
# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'} # cuda/cpu
# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}
embedding = HuggingFaceEmbeddings(
model_name=modelPath, # Provide the pre-trained model's path
model_kwargs=model_kwargs, # Pass the model configuration options
encode_kwargs=encode_kwargs # Pass the encoding options
)
# Upload the vector db from previous step and unzip
with zipfile.ZipFile('docs.zip', 'r') as zip_ref:
zip_ref.extractall()
persist_directory = 'docs/chroma/'
vectordb = Chroma(
persist_directory=persist_directory,
embedding_function=embedding
)
title = "Q&A on enterprise data"
description = "Implementation of Open Source RAG on Private Document"
def quena(question):
qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectordb.as_retriever(), return_source_documents=True)
result = qa_chain.invoke({"query": question})
return result["result"]
demo=gr.Interface(fn=quena,
inputs=gr.Textbox(lines=10,placeholder='''Write your question inside double quotation..Type the Sample Question:\n
What are the procedures to move from research to production environment?? Reply in step-wise pointers.'''),
outputs="text",
title=title,
description=description,)
# Launch the demo!
demo.launch()
|