Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,13 +6,13 @@ import sqlite3
|
|
6 |
import ocrmypdf
|
7 |
import logging
|
8 |
|
9 |
-
from langchain.document_loaders import OnlinePDFLoader # for loading the
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings # open source embedding model
|
11 |
from langchain.text_splitter import CharacterTextSplitter
|
12 |
-
from
|
13 |
from langchain.chains import RetrievalQA # for QA chain
|
14 |
-
from
|
15 |
-
from langchain_core.prompts import PromptTemplate #
|
16 |
|
17 |
# Setup basic logging
|
18 |
logging.basicConfig(level=logging.INFO)
|
@@ -237,7 +237,6 @@ def answer_query(query):
|
|
237 |
def get_log():
|
238 |
return log_messages
|
239 |
|
240 |
-
# Define simple CSS and title HTML
|
241 |
css = """
|
242 |
#col-container {max-width: 700px; margin: auto;}
|
243 |
"""
|
@@ -249,9 +248,8 @@ title = """
|
|
249 |
</div>
|
250 |
"""
|
251 |
|
252 |
-
# Build the Gradio interface
|
253 |
with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
254 |
-
with gr.Column(
|
255 |
gr.HTML(title)
|
256 |
|
257 |
with gr.Tab("Chatbot"):
|
@@ -283,7 +281,6 @@ with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
|
283 |
answers_df = gr.Dataframe(label="Pre-defined Answers")
|
284 |
answer_predefined_btn = gr.Button("Get Answers")
|
285 |
|
286 |
-
# Log window to display errors and info
|
287 |
log_window = gr.Textbox(label="Log Window", interactive=False, lines=10)
|
288 |
|
289 |
with gr.Tab("OCR Converter"):
|
@@ -295,7 +292,6 @@ with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
|
295 |
|
296 |
with gr.Tab("Upload Question Set"):
|
297 |
with gr.Column():
|
298 |
-
# Now only two document types are available
|
299 |
document_type_for_questionset = gr.Dropdown(choices=["DOC_A", "DOC_B"], label="Select Document Type")
|
300 |
tag_for_questionset = gr.Textbox(label="Name for Question Set (e.g., basic-set)")
|
301 |
csv_file = gr.File(label="Load CSV (fields,question)", file_types=['.csv'], type='filepath')
|
@@ -306,7 +302,6 @@ with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
|
306 |
refresh_log_btn = gr.Button("Refresh Log")
|
307 |
refresh_log_btn.click(get_log, outputs=log_window)
|
308 |
|
309 |
-
# Set up button actions
|
310 |
load_pdf_btn.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, open_ai_key, relevant_pages], outputs=status)
|
311 |
summarize_pdf_btn.click(summarize_contents, outputs=summary)
|
312 |
submit_query_btn.click(answer_query, inputs=input_query, outputs=output_answer)
|
@@ -318,5 +313,4 @@ with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
|
318 |
convert_btn.click(ocr_converter, inputs=image_pdf, outputs=ocr_pdf)
|
319 |
load_csv_btn.click(load_csv_and_store_questionset_into_sqlite, inputs=[csv_file, document_type_for_questionset, tag_for_questionset], outputs=status_for_csv)
|
320 |
|
321 |
-
# Launch the Gradio app
|
322 |
demo.launch(debug=True)
|
|
|
6 |
import ocrmypdf
|
7 |
import logging
|
8 |
|
9 |
+
from langchain.document_loaders import OnlinePDFLoader # for loading the PDF
|
10 |
from langchain.embeddings import HuggingFaceEmbeddings # open source embedding model
|
11 |
from langchain.text_splitter import CharacterTextSplitter
|
12 |
+
from langchain_community.vectorstores import Chroma # updated import for vectorization
|
13 |
from langchain.chains import RetrievalQA # for QA chain
|
14 |
+
from langchain_community.chat_models import ChatOpenAI # updated import for ChatOpenAI
|
15 |
+
from langchain_core.prompts import PromptTemplate # updated import per deprecation notice
|
16 |
|
17 |
# Setup basic logging
|
18 |
logging.basicConfig(level=logging.INFO)
|
|
|
237 |
def get_log():
|
238 |
return log_messages
|
239 |
|
|
|
240 |
css = """
|
241 |
#col-container {max-width: 700px; margin: auto;}
|
242 |
"""
|
|
|
248 |
</div>
|
249 |
"""
|
250 |
|
|
|
251 |
with gr.Blocks(css=css, theme=gr.themes.Monochrome()) as demo:
|
252 |
+
with gr.Column(elem_id="col-container"):
|
253 |
gr.HTML(title)
|
254 |
|
255 |
with gr.Tab("Chatbot"):
|
|
|
281 |
answers_df = gr.Dataframe(label="Pre-defined Answers")
|
282 |
answer_predefined_btn = gr.Button("Get Answers")
|
283 |
|
|
|
284 |
log_window = gr.Textbox(label="Log Window", interactive=False, lines=10)
|
285 |
|
286 |
with gr.Tab("OCR Converter"):
|
|
|
292 |
|
293 |
with gr.Tab("Upload Question Set"):
|
294 |
with gr.Column():
|
|
|
295 |
document_type_for_questionset = gr.Dropdown(choices=["DOC_A", "DOC_B"], label="Select Document Type")
|
296 |
tag_for_questionset = gr.Textbox(label="Name for Question Set (e.g., basic-set)")
|
297 |
csv_file = gr.File(label="Load CSV (fields,question)", file_types=['.csv'], type='filepath')
|
|
|
302 |
refresh_log_btn = gr.Button("Refresh Log")
|
303 |
refresh_log_btn.click(get_log, outputs=log_window)
|
304 |
|
|
|
305 |
load_pdf_btn.click(load_pdf_and_generate_embeddings, inputs=[pdf_doc, open_ai_key, relevant_pages], outputs=status)
|
306 |
summarize_pdf_btn.click(summarize_contents, outputs=summary)
|
307 |
submit_query_btn.click(answer_query, inputs=input_query, outputs=output_answer)
|
|
|
313 |
convert_btn.click(ocr_converter, inputs=image_pdf, outputs=ocr_pdf)
|
314 |
load_csv_btn.click(load_csv_and_store_questionset_into_sqlite, inputs=[csv_file, document_type_for_questionset, tag_for_questionset], outputs=status_for_csv)
|
315 |
|
|
|
316 |
demo.launch(debug=True)
|