File size: 2,329 Bytes
0187b8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import os
import zipfile
import gradio as gr

from langchain_community.llms import HuggingFaceEndpoint
from langchain_community.vectorstores import Chroma # Light-weight and in memory
from langchain.chains import RetrievalQA


# Authentication for Huggingface API

HF_TOKEN = os.getenv("HF_TOKEN")
os.environ["HF_TOKEN"] = HF_TOKEN
os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN


# Initialization of LLM

llm = HuggingFaceEndpoint(
    repo_id="HuggingFaceH4/zephyr-7b-beta",
    task="text-generation",
    max_new_tokens = 512,
    top_k = 30,
    temperature = 0.1,
    repetition_penalty = 1.03,
)

## Embeddings

from langchain_community.embeddings import HuggingFaceEmbeddings
#modelPath = "sentence-transformers/all-MiniLM-l6-v2"
modelPath ="mixedbread-ai/mxbai-embed-large-v1"

# Create a dictionary with model configuration options, specifying to use the CPU for computations
model_kwargs = {'device':'cpu'} # cuda/cpu

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': False}

embedding =  HuggingFaceEmbeddings(
    model_name=modelPath,     # Provide the pre-trained model's path
    model_kwargs=model_kwargs, # Pass the model configuration options
    encode_kwargs=encode_kwargs # Pass the encoding options
)

# Upload the vector db from previous step and unzip

with zipfile.ZipFile('docs.zip', 'r') as zip_ref:
    zip_ref.extractall()

persist_directory = 'docs/chroma/' 

vectordb = Chroma(
    persist_directory=persist_directory,
    embedding_function=embedding
)


title = "Q&A  on enterprise data"
description = "Implementation of Open Source RAG on Private Document"

def quena(question):
  qa_chain = RetrievalQA.from_chain_type(llm, retriever=vectordb.as_retriever(), return_source_documents=True)
  result = qa_chain.invoke({"query": question})
  return result["result"]
    
demo=gr.Interface(fn=quena,
                  inputs=gr.Textbox(lines=10,placeholder='''Write your question inside double quotation..Type the Sample Question:\n
                  What are the procedures to move from research to production environment?? Reply in step-wise pointers.'''),
                  outputs="text",
                  title=title,
                  description=description,)
# Launch the demo!
demo.launch()