Cachoups commited on
Commit
3373145
·
verified ·
1 Parent(s): 019952c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -67
app.py CHANGED
@@ -302,7 +302,7 @@ stored_df1 = []
302
  stored_df2 = []
303
 
304
  with gr.Blocks() as demo:
305
- with gr.Tab("Contents"):
306
  gr.Markdown("""
307
  ## Macro-economy Adverse Scenario Comparison from EBA Reports
308
 
@@ -322,108 +322,72 @@ with gr.Blocks() as demo:
322
  - Select two Excel files and a sheet name.
323
  - For the two selected tables, compute the difference of the cumulative adverse growth rate over their respective three years for the selected sheet name (topic).
324
  - For the selected topic (sheet name), find related sentences in the associated PDF text that mention the topic, and classify them by sentiment.
325
- - For a selected country and topic, describe the adverse growth rate trend over three years using the [**google/flan-t5-base** model](https://huggingface.co/google/flan-t5-base).
326
  """)
327
  with gr.Tab("Financial Report Text Analysis"):
328
- gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
329
 
330
  with gr.Row():
331
- # Upload PDFs
332
  with gr.Column():
 
333
  pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
334
  pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
335
-
 
336
  with gr.Column():
337
- b1 = gr.Button("Extract and Display Paragraphs")
338
  paragraph_1_dropdown = gr.Dropdown(label="Select Paragraph from PDF 1")
339
  paragraph_2_dropdown = gr.Dropdown(label="Select Paragraph from PDF 2")
340
 
341
  def update_paragraphs(pdf1, pdf2):
342
- global stored_paragraphs_1, stored_paragraphs_2
343
  stored_paragraphs_1, stored_paragraphs_2 = extract_and_paragraph(pdf1, pdf2, True)
344
- updated_dropdown_1 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_1)]
345
- updated_dropdown_2 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_2)]
346
- return gr.update(choices=updated_dropdown_1), gr.update(choices=updated_dropdown_2)
347
 
348
- b1.click(fn=update_paragraphs, inputs=[pdf1, pdf2], outputs=[paragraph_1_dropdown, paragraph_2_dropdown])
349
 
350
  with gr.Row():
351
- # Process the selected paragraph from PDF 1
352
  with gr.Column():
353
  gr.Markdown("### PDF 1 Analysis")
354
- selected_paragraph_1 = gr.Textbox(label="Selected Paragraph 1 Content", lines=4)
355
  summarize_btn1 = gr.Button("Summarize Text from PDF 1")
356
  summary_textbox_1 = gr.Textbox(label="Summary for PDF 1", lines=2)
357
- summarize_btn1.click(fn=lambda p: process_paragraph_1_sum(p), inputs=paragraph_1_dropdown, outputs=summary_textbox_1)
358
  sentiment_btn1 = gr.Button("Classify Financial Tone from PDF 1")
359
- sentiment_textbox_1 = gr.Textbox(label="Classification for PDF 1", lines=1)
360
- sentiment_btn1.click(fn=lambda p: process_paragraph_1_sent(p), inputs=paragraph_1_dropdown, outputs=sentiment_textbox_1)
361
- analyze_btn1 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
362
- fin_spans_1 = gr.HighlightedText(label="Financial Tone Analysis for PDF 1")
363
- analyze_btn1.click(fn=lambda p: process_paragraph_1_sent_tone(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1)
364
- analyze_btn1_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
365
- fin_spans_1_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 1 bis")
366
- analyze_btn1_.click(fn=lambda p: process_paragraph_1_sent_tone_bis(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1_)
367
-
368
- # Process the selected paragraph from PDF 2
369
  with gr.Column():
370
  gr.Markdown("### PDF 2 Analysis")
371
- selected_paragraph_2 = gr.Textbox(label="Selected Paragraph 2 Content", lines=4)
372
- selected_paragraph_2.change(show, paragraph_2_dropdown, selected_paragraph_2)
373
  summarize_btn2 = gr.Button("Summarize Text from PDF 2")
374
  summary_textbox_2 = gr.Textbox(label="Summary for PDF 2", lines=2)
375
- summarize_btn2.click(fn=lambda p: process_paragraph_2_sum(p), inputs=paragraph_2_dropdown, outputs=summary_textbox_2)
376
  sentiment_btn2 = gr.Button("Classify Financial Tone from PDF 2")
377
- sentiment_textbox_2 = gr.Textbox(label="Classification for PDF 2", lines=1)
378
- sentiment_btn2.click(fn=lambda p: process_paragraph_2_sent(p), inputs=paragraph_2_dropdown, outputs=sentiment_textbox_2)
379
- analyze_btn2 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
380
- fin_spans_2 = gr.HighlightedText(label="Financial Tone Analysis for PDF 2")
381
- analyze_btn2.click(fn=lambda p: process_paragraph_2_sent_tone(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2)
382
- analyze_btn2_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
383
- fin_spans_2_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 2 bis")
384
- analyze_btn2_.click(fn=lambda p: process_paragraph_2_sent_tone_bis(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2_)
385
 
386
  with gr.Tab("Financial Report Table Analysis"):
387
- # New tab content goes here
388
- gr.Markdown("## Excel Data Comparison")
389
 
390
  with gr.Row():
391
  with gr.Column():
 
392
  file1 = gr.Dropdown(choices=get_excel_files(PDF_FOLDER), label="Select Excel File 1")
393
  file2 = gr.Dropdown(choices=get_excel_files(PDF_FOLDER), label="Select Excel File 2")
394
- sheet = gr.Dropdown(choices=["GDP", "HICP", "RRE prices", "Unemployment", "CRE prices"], label="Select Sheet for File 1 and 2")
395
-
396
- with gr.Column():
397
- result = gr.Image(label="Comparison pLot")
398
-
399
- def update_sheets(file):
400
- return get_sheet_names(file)
401
-
402
-
403
- b1 = gr.Button("Compare Data")
404
- b2 = gr.Button("Extract text information")
405
-
406
- with gr.Row():
407
  with gr.Column():
408
- sentiment_results_pdf1 = gr.HighlightedText(label="Sentiment Analysis - PDF 1")
409
  country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
410
- summarize_btn1_country = gr.Button("Summary for the selected country")
411
- text_result_df1 = gr.Textbox(label="Sentence for excel file 1", lines=2)
412
- summarize_btn1_country.click(fn=lambda country, theme: generate_text(stored_df1, country, theme),
413
- inputs=[country_1_dropdown, sheet],
414
- outputs=text_result_df1)
415
- with gr.Column():
416
- sentiment_results_pdf2 = gr.HighlightedText(label="Sentiment Analysis - PDF 2")
417
  country_2_dropdown = gr.Dropdown(label="Select Country from Excel File 2")
418
- summarize_btn2_country = gr.Button("Summary for the selected country")
419
- text_result_df2 = gr.Textbox(label="Sentence for excel file 2", lines=2)
420
- summarize_btn2_country.click(fn=lambda country, theme: generate_text(stored_df2, country, theme),
421
- inputs=[country_2_dropdown, sheet],
422
- outputs=text_result_df2)
423
-
424
- # Button to extract text from PDFs and perform sentiment analysis
425
- b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=[result,country_1_dropdown, country_2_dropdown])
426
- b2.click(fn=process_pdfs_and_analyze_sentiment, inputs=[file1, file2, sheet], outputs=[sentiment_results_pdf1, sentiment_results_pdf2])
427
 
 
 
 
 
 
 
 
428
 
429
  demo.launch()
 
302
  stored_df2 = []
303
 
304
  with gr.Blocks() as demo:
305
+ with gr.Tab("Methodology"):
306
  gr.Markdown("""
307
  ## Macro-economy Adverse Scenario Comparison from EBA Reports
308
 
 
322
  - Select two Excel files and a sheet name.
323
  - For the two selected tables, compute the difference of the cumulative adverse growth rate over their respective three years for the selected sheet name (topic).
324
  - For the selected topic (sheet name), find related sentences in the associated PDF text that mention the topic, and classify them by sentiment.
325
+ - For a selected country and topic, describe the adverse growth rate trend over three years using the [**google/flan-t5-base**](https://huggingface.co/google/flan-t5-base).
326
  """)
327
  with gr.Tab("Financial Report Text Analysis"):
328
+ gr.Markdown("## Paragraph Extraction and Analysis on Adverse Macro-Economy Scenarios")
329
 
330
  with gr.Row():
 
331
  with gr.Column():
332
+ gr.Markdown("### Step 1: Upload and Extract Paragraphs")
333
  pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
334
  pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
335
+ extract_button = gr.Button("Extract Paragraphs")
336
+
337
  with gr.Column():
338
+ gr.Markdown("### Step 2: Select Paragraphs for Analysis")
339
  paragraph_1_dropdown = gr.Dropdown(label="Select Paragraph from PDF 1")
340
  paragraph_2_dropdown = gr.Dropdown(label="Select Paragraph from PDF 2")
341
 
342
  def update_paragraphs(pdf1, pdf2):
 
343
  stored_paragraphs_1, stored_paragraphs_2 = extract_and_paragraph(pdf1, pdf2, True)
344
+ return [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_1)], \
345
+ [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_2)]
 
346
 
347
+ extract_button.click(update_paragraphs, inputs=[pdf1, pdf2], outputs=[paragraph_1_dropdown, paragraph_2_dropdown])
348
 
349
  with gr.Row():
 
350
  with gr.Column():
351
  gr.Markdown("### PDF 1 Analysis")
 
352
  summarize_btn1 = gr.Button("Summarize Text from PDF 1")
353
  summary_textbox_1 = gr.Textbox(label="Summary for PDF 1", lines=2)
 
354
  sentiment_btn1 = gr.Button("Classify Financial Tone from PDF 1")
355
+ sentiment_textbox_1 = gr.Textbox(label="Tone Classification for PDF 1", lines=1)
356
+
357
+ summarize_btn1.click(process_paragraph_1_sum, inputs=paragraph_1_dropdown, outputs=summary_textbox_1)
358
+ sentiment_btn1.click(process_paragraph_1_sent, inputs=paragraph_1_dropdown, outputs=sentiment_textbox_1)
359
+
 
 
 
 
 
360
  with gr.Column():
361
  gr.Markdown("### PDF 2 Analysis")
 
 
362
  summarize_btn2 = gr.Button("Summarize Text from PDF 2")
363
  summary_textbox_2 = gr.Textbox(label="Summary for PDF 2", lines=2)
 
364
  sentiment_btn2 = gr.Button("Classify Financial Tone from PDF 2")
365
+ sentiment_textbox_2 = gr.Textbox(label="Tone Classification for PDF 2", lines=1)
366
+
367
+ summarize_btn2.click(process_paragraph_2_sum, inputs=paragraph_2_dropdown, outputs=summary_textbox_2)
368
+ sentiment_btn2.click(process_paragraph_2_sent, inputs=paragraph_2_dropdown, outputs=sentiment_textbox_2)
 
 
 
 
369
 
370
  with gr.Tab("Financial Report Table Analysis"):
371
+ gr.Markdown("## Excel Data Comparison and Topic Analysis")
 
372
 
373
  with gr.Row():
374
  with gr.Column():
375
+ gr.Markdown("### Step 1: Upload Excel Files")
376
  file1 = gr.Dropdown(choices=get_excel_files(PDF_FOLDER), label="Select Excel File 1")
377
  file2 = gr.Dropdown(choices=get_excel_files(PDF_FOLDER), label="Select Excel File 2")
378
+ sheet = gr.Dropdown(choices=["GDP", "HICP", "RRE prices", "Unemployment", "CRE prices"], label="Select Sheet")
379
+
 
 
 
 
 
 
 
 
 
 
 
380
  with gr.Column():
381
+ gr.Markdown("### Step 2: Select a Country for Adverse Growth Analysis")
382
  country_1_dropdown = gr.Dropdown(label="Select Country from Excel File 1")
 
 
 
 
 
 
 
383
  country_2_dropdown = gr.Dropdown(label="Select Country from Excel File 2")
 
 
 
 
 
 
 
 
 
384
 
385
+ with gr.Row():
386
+ gr.Markdown("### Step 3: Compare Data and Generate Reports")
387
+ comparison_button = gr.Button("Compare Data")
388
+ text_result_df1 = gr.Textbox(label="Adverse Growth Report for Excel File 1", lines=4)
389
+ text_result_df2 = gr.Textbox(label="Adverse Growth Report for Excel File 2", lines=4)
390
+
391
+ comparison_button.click(fn=process_and_compare, inputs=[file1, sheet, file2], outputs=[text_result_df1, text_result_df2])
392
 
393
  demo.launch()