Spaces:
Runtime error
Runtime error
updated app.py file
Browse files
app.py
CHANGED
@@ -42,15 +42,20 @@ HF_TOKEN = os.environ["HF_TOKEN"]
|
|
42 |
"""
|
43 |
### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
|
44 |
### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
|
45 |
-
text_loader =
|
46 |
-
documents =
|
47 |
|
48 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
49 |
-
text_splitter =
|
50 |
-
split_documents =
|
|
|
51 |
|
52 |
### 3. LOAD HUGGINGFACE EMBEDDINGS
|
53 |
-
hf_embeddings =
|
|
|
|
|
|
|
|
|
54 |
|
55 |
async def add_documents_async(vectorstore, documents):
|
56 |
await vectorstore.aadd_documents(documents)
|
@@ -109,18 +114,37 @@ hf_retriever = asyncio.run(run())
|
|
109 |
1. Define a String Template
|
110 |
2. Create a Prompt Template from the String Template
|
111 |
"""
|
112 |
-
|
113 |
-
|
|
|
114 |
|
115 |
-
|
116 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
117 |
|
118 |
# -- GENERATION -- #
|
119 |
"""
|
120 |
1. Create a HuggingFaceEndpoint for the LLM
|
121 |
"""
|
122 |
### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
|
123 |
-
hf_llm =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
@cl.author_rename
|
126 |
def rename(original_author: str):
|
@@ -145,7 +169,7 @@ async def start_chat():
|
|
145 |
"""
|
146 |
|
147 |
### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
|
148 |
-
lcel_rag_chain =
|
149 |
|
150 |
cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
|
151 |
|
|
|
42 |
"""
|
43 |
### 1. CREATE TEXT LOADER AND LOAD DOCUMENTS
|
44 |
### NOTE: PAY ATTENTION TO THE PATH THEY ARE IN.
|
45 |
+
text_loader = TextLoader("./data/paul_graham_essays.txt")
|
46 |
+
documents = text_loader.load()
|
47 |
|
48 |
### 2. CREATE TEXT SPLITTER AND SPLIT DOCUMENTS
|
49 |
+
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=30)
|
50 |
+
split_documents = split_documents = text_splitter.split_documents(documents)
|
51 |
+
print(len(split_documents))
|
52 |
|
53 |
### 3. LOAD HUGGINGFACE EMBEDDINGS
|
54 |
+
hf_embeddings = HuggingFaceEndpointEmbeddings(
|
55 |
+
model=HF_EMBED_ENDPOINT,
|
56 |
+
task="feature-extraction",
|
57 |
+
huggingfacehub_api_token=os.environ["HF_TOKEN"],
|
58 |
+
)
|
59 |
|
60 |
async def add_documents_async(vectorstore, documents):
|
61 |
await vectorstore.aadd_documents(documents)
|
|
|
114 |
1. Define a String Template
|
115 |
2. Create a Prompt Template from the String Template
|
116 |
"""
|
117 |
+
RAG_PROMPT_TEMPLATE = """\
|
118 |
+
<|start_header_id|>system<|end_header_id|>
|
119 |
+
You are a helpful assistant. You answer user questions based on provided context. If you can't answer the question with the provided context, say you don't know.<|eot_id|>
|
120 |
|
121 |
+
<|start_header_id|>user<|end_header_id|>
|
122 |
+
User Query:
|
123 |
+
{query}
|
124 |
+
|
125 |
+
Context:
|
126 |
+
{context}<|eot_id|>
|
127 |
+
|
128 |
+
<|start_header_id|>assistant<|end_header_id|>
|
129 |
+
"""
|
130 |
+
|
131 |
+
rag_prompt = PromptTemplate.from_template(RAG_PROMPT_TEMPLATE)
|
132 |
|
133 |
# -- GENERATION -- #
|
134 |
"""
|
135 |
1. Create a HuggingFaceEndpoint for the LLM
|
136 |
"""
|
137 |
### 1. CREATE HUGGINGFACE ENDPOINT FOR LLM
|
138 |
+
hf_llm = HuggingFaceEndpoint(
|
139 |
+
endpoint_url=f"{HF_LLM_ENDPOINT}",
|
140 |
+
max_new_tokens=512,
|
141 |
+
top_k=10,
|
142 |
+
top_p=0.95,
|
143 |
+
typical_p=0.95,
|
144 |
+
temperature=0.01,
|
145 |
+
repetition_penalty=1.03,
|
146 |
+
huggingfacehub_api_token=os.environ["HF_TOKEN"]
|
147 |
+
)
|
148 |
|
149 |
@cl.author_rename
|
150 |
def rename(original_author: str):
|
|
|
169 |
"""
|
170 |
|
171 |
### BUILD LCEL RAG CHAIN THAT ONLY RETURNS TEXT
|
172 |
+
lcel_rag_chain = {"context": itemgetter("query") | hf_retriever, "query": itemgetter("query")}| rag_prompt | hf_llm
|
173 |
|
174 |
cl.user_session.set("lcel_rag_chain", lcel_rag_chain)
|
175 |
|