Update app.py
Browse files
app.py
CHANGED
|
@@ -20,15 +20,6 @@ import torch
|
|
| 20 |
import tqdm
|
| 21 |
import accelerate
|
| 22 |
|
| 23 |
-
#Set parameters
|
| 24 |
-
|
| 25 |
-
llm_model = 'mistralai/Mixtral-8x7B-Instruct-v0.1'
|
| 26 |
-
list_file_obj = '/home/user/app/pdfs/'
|
| 27 |
-
slider_chunk_size = 1024
|
| 28 |
-
slider_chunk_overlap = 128
|
| 29 |
-
temperature = 0.1
|
| 30 |
-
max_tokens = 6000
|
| 31 |
-
top_k = 3
|
| 32 |
|
| 33 |
|
| 34 |
# default_persist_directory = './chroma_HF/'
|
|
@@ -247,29 +238,64 @@ def demo():
|
|
| 247 |
vector_db = gr.State()
|
| 248 |
qa_chain = gr.State()
|
| 249 |
collection_name = gr.State()
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 257 |
with gr.Row():
|
| 258 |
-
|
| 259 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 260 |
with gr.Row():
|
| 261 |
-
|
| 262 |
-
source2_page = gr.Number(label="Page", scale=1)
|
| 263 |
with gr.Row():
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
with gr.Row():
|
| 267 |
-
msg = gr.Textbox(placeholder="Type message", container=True)
|
| 268 |
-
with gr.Row():
|
| 269 |
-
db_btn = gr.Button("Generate vector database...")
|
| 270 |
-
qachain_btn = gr.Button("Initialize question-answering chain...")
|
| 271 |
-
submit_btn = gr.Button("Submit")
|
| 272 |
-
clear_btn = gr.ClearButton([msg, chatbot])
|
| 273 |
|
| 274 |
# Preprocessing events
|
| 275 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|
|
|
|
| 20 |
import tqdm
|
| 21 |
import accelerate
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
|
| 25 |
# default_persist_directory = './chroma_HF/'
|
|
|
|
| 238 |
vector_db = gr.State()
|
| 239 |
qa_chain = gr.State()
|
| 240 |
collection_name = gr.State()
|
| 241 |
+
|
| 242 |
+
gr.Markdown(
|
| 243 |
+
"""<center><h2>PDF-based chatbot (powered by LangChain and open-source LLMs)</center></h2>
|
| 244 |
+
<h3>Ask any questions about your PDF documents, along with follow-ups</h3>
|
| 245 |
+
<b>Note:</b> This AI assistant performs retrieval-augmented generation from your PDF documents. \
|
| 246 |
+
When generating answers, it takes past questions into account (via conversational memory), and includes document references for clarity purposes.</i>
|
| 247 |
+
<br><b>Warning:</b> This space uses the free CPU Basic hardware from Hugging Face. Some steps and LLM models used below (free inference endpoints) can take some time to generate an output.<br>
|
| 248 |
+
""")
|
| 249 |
+
with gr.Tab("Step 1 - Document pre-processing"):
|
| 250 |
+
with gr.Row():
|
| 251 |
+
document = gr.Files(value = '/home/user/app/pdfs/Annual-Report-2022-2023-English_1.pdf',visible=True,
|
| 252 |
+
height=100, file_count="multiple", file_types=["pdf"], interactive=True, label="Upload your PDF documents (single or multiple)")
|
| 253 |
+
# upload_btn = gr.UploadButton("Loading document...", height=100, file_count="multiple", file_types=["pdf"], scale=1)
|
| 254 |
+
with gr.Row():
|
| 255 |
+
db_btn = gr.Radio(["ChromaDB"], label="Vector database type", value = "ChromaDB", type="index", info="Choose your vector database")
|
| 256 |
+
with gr.Accordion("Advanced options - Document text splitter", open=False):
|
| 257 |
+
with gr.Row():
|
| 258 |
+
slider_chunk_size = gr.Slider(minimum = 100, maximum = 1000, value=600, step=20, label="Chunk size", info="Chunk size", interactive=True)
|
| 259 |
+
with gr.Row():
|
| 260 |
+
slider_chunk_overlap = gr.Slider(minimum = 10, maximum = 200, value=40, step=10, label="Chunk overlap", info="Chunk overlap", interactive=True)
|
| 261 |
+
with gr.Row():
|
| 262 |
+
db_progress = gr.Textbox(label="Vector database initialization", value="None")
|
| 263 |
with gr.Row():
|
| 264 |
+
db_btn = gr.Button("Generate vector database...")
|
| 265 |
+
|
| 266 |
+
with gr.Tab("Step 2 - QA chain initialization"):
|
| 267 |
+
with gr.Row():
|
| 268 |
+
llm_btn = gr.Radio(list_llm_simple, \
|
| 269 |
+
label="LLM models", value = list_llm_simple[0], type="index", info="Choose your LLM model")
|
| 270 |
+
with gr.Accordion("Advanced options - LLM model", open=False):
|
| 271 |
+
with gr.Row():
|
| 272 |
+
slider_temperature = gr.Slider(minimum = 0.0, maximum = 1.0, value=0.7, step=0.1, label="Temperature", info="Model temperature", interactive=True)
|
| 273 |
+
with gr.Row():
|
| 274 |
+
slider_maxtokens = gr.Slider(minimum = 224, maximum = 4096, value=1024, step=32, label="Max Tokens", info="Model max tokens", interactive=True)
|
| 275 |
+
with gr.Row():
|
| 276 |
+
slider_topk = gr.Slider(minimum = 1, maximum = 10, value=3, step=1, label="top-k samples", info="Model top-k samples", interactive=True)
|
| 277 |
+
with gr.Row():
|
| 278 |
+
llm_progress = gr.Textbox(value="None",label="QA chain initialization")
|
| 279 |
+
with gr.Row():
|
| 280 |
+
qachain_btn = gr.Button("Initialize question-answering chain...")
|
| 281 |
+
|
| 282 |
+
with gr.Tab("Step 3 - Conversation with chatbot"):
|
| 283 |
+
chatbot = gr.Chatbot(height=300)
|
| 284 |
+
with gr.Accordion("Advanced - Document references", open=False):
|
| 285 |
+
with gr.Row():
|
| 286 |
+
doc_source1 = gr.Textbox(label="Reference 1", lines=2, container=True, scale=20)
|
| 287 |
+
source1_page = gr.Number(label="Page", scale=1)
|
| 288 |
+
with gr.Row():
|
| 289 |
+
doc_source2 = gr.Textbox(label="Reference 2", lines=2, container=True, scale=20)
|
| 290 |
+
source2_page = gr.Number(label="Page", scale=1)
|
| 291 |
+
with gr.Row():
|
| 292 |
+
doc_source3 = gr.Textbox(label="Reference 3", lines=2, container=True, scale=20)
|
| 293 |
+
source3_page = gr.Number(label="Page", scale=1)
|
| 294 |
with gr.Row():
|
| 295 |
+
msg = gr.Textbox(placeholder="Type message", container=True)
|
|
|
|
| 296 |
with gr.Row():
|
| 297 |
+
submit_btn = gr.Button("Submit")
|
| 298 |
+
clear_btn = gr.ClearButton([msg, chatbot])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 299 |
|
| 300 |
# Preprocessing events
|
| 301 |
#upload_btn.upload(upload_file, inputs=[upload_btn], outputs=[document])
|