Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -12,7 +12,8 @@ import io
|
|
12 |
# Initialize spaCy model
|
13 |
nlp = spacy.load('en_core_web_sm')
|
14 |
nlp.add_pipe('sentencizer')
|
15 |
-
|
|
|
16 |
def split_in_sentences(text):
|
17 |
doc = nlp(text)
|
18 |
return [str(sent).strip() for sent in doc.sents]
|
@@ -44,6 +45,19 @@ def fin_ext_bis(text):
|
|
44 |
results = fin_model_bis(split_in_sentences(text))
|
45 |
return make_spans(text, results)
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def extract_and_paragraph(pdf1, pdf2, paragraph):
|
48 |
if not pdf1 or not pdf2:
|
49 |
return [], []
|
@@ -68,8 +82,6 @@ def extract_and_paragraph(pdf1, pdf2, paragraph):
|
|
68 |
|
69 |
return paragraphs_1, paragraphs_2
|
70 |
|
71 |
-
# Gradio interface setup
|
72 |
-
PDF_FOLDER = "data"
|
73 |
def process_paragraph_1_sum(paragraph):
|
74 |
try:
|
75 |
paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
|
@@ -445,9 +457,10 @@ with gr.Blocks(theme='gradio/soft',js=js_func) as demo:
|
|
445 |
# Upload PDFs
|
446 |
with gr.Column():
|
447 |
gr.Markdown("### Step 1: Upload PDF Files")
|
|
|
448 |
pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
|
449 |
pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
|
450 |
-
|
451 |
with gr.Column():
|
452 |
gr.Markdown("### Step 2: Extract and Display Paragraphs")
|
453 |
b1 = gr.Button("Extract and Display Paragraphs")
|
|
|
12 |
# Initialize spaCy model
|
13 |
nlp = spacy.load('en_core_web_sm')
|
14 |
nlp.add_pipe('sentencizer')
|
15 |
+
# Gradio interface setup
|
16 |
+
PDF_FOLDER = "data"
|
17 |
def split_in_sentences(text):
|
18 |
doc = nlp(text)
|
19 |
return [str(sent).strip() for sent in doc.sents]
|
|
|
45 |
results = fin_model_bis(split_in_sentences(text))
|
46 |
return make_spans(text, results)
|
47 |
|
48 |
+
def upload_file_and_update_dropdown(file):
|
49 |
+
if file is not None:
|
50 |
+
# Save the file to the upload directory
|
51 |
+
file_path = os.path.join(PDF_FOLDER, file.name)
|
52 |
+
with open(file_path, "wb") as f:
|
53 |
+
f.write(file.read())
|
54 |
+
|
55 |
+
# Get the updated list of files
|
56 |
+
files = os.listdir(PDF_FOLDER)
|
57 |
+
return gr.Dropdown.update(choices=files), gr.Dropdown.update(choices=files)
|
58 |
+
|
59 |
+
return gr.Dropdown.update(choices=[]),gr.Dropdown.update(choices=[])
|
60 |
+
|
61 |
def extract_and_paragraph(pdf1, pdf2, paragraph):
|
62 |
if not pdf1 or not pdf2:
|
63 |
return [], []
|
|
|
82 |
|
83 |
return paragraphs_1, paragraphs_2
|
84 |
|
|
|
|
|
85 |
def process_paragraph_1_sum(paragraph):
|
86 |
try:
|
87 |
paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
|
|
|
457 |
# Upload PDFs
|
458 |
with gr.Column():
|
459 |
gr.Markdown("### Step 1: Upload PDF Files")
|
460 |
+
upload_button = gr.File(label="Upload files", file_types=[".pdf"])
|
461 |
pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
|
462 |
pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
|
463 |
+
upload_button.upload(upload_file_and_update_dropdown, upload_button, [pdf1, pdf2])
|
464 |
with gr.Column():
|
465 |
gr.Markdown("### Step 2: Extract and Display Paragraphs")
|
466 |
b1 = gr.Button("Extract and Display Paragraphs")
|