niting089 commited on
Commit
22effd2
1 Parent(s): f74cbe7

updated app.py file

Browse files
Files changed (1) hide show
  1. app.py +35 -11
app.py CHANGED
@@ -42,15 +42,20 @@ HF_TOKEN = os.environ["HF_TOKEN"]
42
  """
43
  ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
44
  ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
45
- text_loader =
46
- documents =
47
 
48
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
49
- text_splitter =
50
- split_documents =
 
51
 
52
  ### 3. LOAD HUGGINGFACE EMBEDDINGS
53
- hf_embeddings =
 
 
 
 
54
 
55
  async def add_documents_async(vectorstore, documents):
56
  await vectorstore.aadd_documents(documents)
@@ -109,18 +114,37 @@ hf_retriever = asyncio.run(run())
109
  1. Define a String Template
110
  2. Create a Prompt Template from the String Template
111
  """
112
- ### 1. DEFINE STRING TEMPLATE
113
- RAG_PROMPT_TEMPLATE =
 
114
 
115
- ### 2. CREATE PROMPT TEMPLATE
116
- rag_prompt =
 
 
 
 
 
 
 
 
 
117
 
118
  # -- GENERATION -- #
119
  """
120
  1. Create a HuggingFaceEndpoint for the LLM
121
  """
122
  ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
123
- hf_llm =
 
 
 
 
 
 
 
 
 
124
 
125
  @cl.author_rename
126
  def rename(original_author: str):
@@ -145,7 +169,7 @@ async def start_chat():
145
  """
146
 
147
  ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
148
- lcel_rag_chain =
149
 
150
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
151
 
 
42
  """
43
  ### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
44
  ### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
45
+ text_loader = TextLoader("./data/paul_graham_essays.txt")
46
+ documents = text_loader.load()
47
 
48
  ### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
49
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
50
+ split_documents = split_documents = text_splitter.split_documents(documents)
51
+ print(len(split_documents))
52
 
53
  ### 3. LOAD HUGGINGFACE EMBEDDINGS
54
+ hf_embeddings = HuggingFaceEndpointEmbeddings(
55
+ model=HF_EMBED_ENDPOINT,
56
+ task="feature-extraction",
57
+ huggingfacehub_api_token=os.environ["HF_TOKEN"],
58
+ )
59
 
60
  async def add_documents_async(vectorstore, documents):
61
  await vectorstore.aadd_documents(documents)
 
114
  1. Define a String Template
115
  2. Create a Prompt Template from the String Template
116
  """
117
+ RAG_PROMPT_TEMPLATE = """\
118
+ <|start_header_id|>system<|end_header_id|>
119
+ You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
120
 
121
+ <|start_header_id|>user<|end_header_id|>
122
+ User Query:
123
+ {query}
124
+
125
+ Context:
126
+ {context}<|eot_id|>
127
+
128
+ <|start_header_id|>assistant<|end_header_id|>
129
+ """
130
+
131
+ rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
132
 
133
  # -- GENERATION -- #
134
  """
135
  1. Create a HuggingFaceEndpoint for the LLM
136
  """
137
  ### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
138
+ hf_llm = HuggingFaceEndpoint(
139
+ endpoint_url=f"{HF_LLM_ENDPOINT}",
140
+ max_new_tokens=512,
141
+ top_k=10,
142
+ top_p=0.95,
143
+ typical_p=0.95,
144
+ temperature=0.01,
145
+ repetition_penalty=1.03,
146
+ huggingfacehub_api_token=os.environ["HF_TOKEN"]
147
+ )
148
 
149
  @cl.author_rename
150
  def rename(original_author: str):
 
169
  """
170
 
171
  ### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
172
+ lcel_rag_chain = {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
173
 
174
  cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
175