Spaces:

Cachoups
/

FinanceReport

Sleeping

App Files Files Community

Cachoups commited on Sep 13, 2024

Commit

22db5e3

verified ·

1 Parent(s): d2deb56

Update app.py

Browse files

Files changed (1) hide show

app.py +166 -125

app.py CHANGED Viewed

@@ -3,7 +3,11 @@ import gradio as gr
 from transformers import pipeline
 import spacy
 import lib.read_pdf
 # Initialize spaCy model
 nlp = spacy.load('en_core_web_sm')
 nlp.add_pipe('sentencizer')
@@ -70,134 +74,171 @@ def get_pdf_files(folder):
 def show(name):
     return f"{name}"
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
 with gr.Blocks() as demo:
-    gr.Markdown("## Financial Report Paragraph Selection and Analysis")
-    with gr.Row():
-        # Upload PDFs
-        with gr.Column():
-            pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
-            pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
-        with gr.Column():
-            b1 = gr.Button("Extract and Display Paragraphs")
-            paragraph_1_dropdown = gr.Dropdown(label="Select Paragraph from PDF 1")
-            paragraph_2_dropdown = gr.Dropdown(label="Select Paragraph from PDF 2")
-            def update_paragraphs(pdf1, pdf2):
-                global stored_paragraphs_1, stored_paragraphs_2
-                stored_paragraphs_1, stored_paragraphs_2 = extract_and_summarize(pdf1, pdf2)
-                updated_dropdown_1 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_1)]
-                updated_dropdown_2 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_2)]
-                return gr.update(choices=updated_dropdown_1), gr.update(choices=updated_dropdown_2)
-            b1.click(fn=update_paragraphs, inputs=[pdf1, pdf2], outputs=[paragraph_1_dropdown, paragraph_2_dropdown])
-    with gr.Row():
-        # Process the selected paragraph from PDF 1
-        with gr.Column():
-            gr.Markdown("### PDF 1 Analysis")
-            def process_paragraph_1_sum(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_1[paragraph_index]
-                    summary = summarize_text(selected_paragraph)
-                    return summary
-                except (IndexError, ValueError):
-                    return "Error"
-            def process_paragraph_1_sent(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_1[paragraph_index]
-                    sentiment = text_to_sentiment(selected_paragraph)
-                    return sentiment
-                except (IndexError, ValueError):
-                    return "Error"
-            def process_paragraph_1_sent_tone(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_1[paragraph_index]
-                    fin_spans = fin_ext(selected_paragraph)
-                    return fin_spans
-                except (IndexError, ValueError):
-                    return []
-            def process_paragraph_1_sent_tone_bis(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_1[paragraph_index]
-                    fin_spans = fin_ext_bis(selected_paragraph)
-                    return fin_spans
-                except (IndexError, ValueError):
-                    return []
-            selected_paragraph_1 = gr.Textbox(label="Selected Paragraph 1 Content", lines=4)
-            selected_paragraph_1.change(show, paragraph_1_dropdown, selected_paragraph_1)
-            summarize_btn1 = gr.Button("Summarize Text from PDF 1")
-            summary_textbox_1 = gr.Textbox(label="Summary for PDF 1", lines=2)
-            summarize_btn1.click(fn=lambda p: process_paragraph_1_sum(p), inputs=paragraph_1_dropdown, outputs=summary_textbox_1)
-            sentiment_btn1 = gr.Button("Classify Financial Tone from PDF 1")
-            sentiment_textbox_1 = gr.Textbox(label="Classification for PDF 1", lines=1)
-            sentiment_btn1.click(fn=lambda p: process_paragraph_1_sent(p), inputs=paragraph_1_dropdown, outputs=sentiment_textbox_1)
-            analyze_btn1 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
-            fin_spans_1 = gr.HighlightedText(label="Financial Tone Analysis for PDF 1")
-            analyze_btn1.click(fn=lambda p: process_paragraph_1_sent_tone(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1)
-            analyze_btn1_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
-            fin_spans_1_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 1 bis")
-            analyze_btn1_.click(fn=lambda p: process_paragraph_1_sent_tone_bis(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1_)
-        # Process the selected paragraph from PDF 2
-        with gr.Column():
-            gr.Markdown("### PDF 2 Analysis")
-            def process_paragraph_2_sum(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_2[paragraph_index]
-                    summary = summarize_text(selected_paragraph)
-                    return summary
-                except (IndexError, ValueError):
-                    return "Error"
-            def process_paragraph_2_sent(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_2[paragraph_index]
-                    sentiment = text_to_sentiment(selected_paragraph)
-                    return sentiment
-                except (IndexError, ValueError):
-                    return "Error"
-            def process_paragraph_2_sent_tone(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_2[paragraph_index]
-                    fin_spans = fin_ext(selected_paragraph)
-                    return fin_spans
-                except (IndexError, ValueError):
-                    return []
-            def process_paragraph_2_sent_tone_bis(paragraph):
-                try:
-                    paragraph_index = int(paragraph.split(':')[0].replace('Paragraph ', '')) - 1
-                    selected_paragraph = stored_paragraphs_2[paragraph_index]
-                    fin_spans = fin_ext_bis(selected_paragraph)
-                    return fin_spans
-                except (IndexError, ValueError):
-                    return []
-            selected_paragraph_2 = gr.Textbox(label="Selected Paragraph 2 Content", lines=4)
-            selected_paragraph_2.change(show, paragraph_2_dropdown, selected_paragraph_2)
-            summarize_btn2 = gr.Button("Summarize Text from PDF 2")
-            summary_textbox_2 = gr.Textbox(label="Summary for PDF 2", lines=2)
-            summarize_btn2.click(fn=lambda p: process_paragraph_2_sum(p), inputs=paragraph_2_dropdown, outputs=summary_textbox_2)
-            sentiment_btn2 = gr.Button("Classify Financial Tone from PDF 2")
-            sentiment_textbox_2 = gr.Textbox(label="Classification for PDF 2", lines=1)
-            sentiment_btn2.click(fn=lambda p: process_paragraph_2_sent(p), inputs=paragraph_2_dropdown, outputs=sentiment_textbox_2)
-            analyze_btn2 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
-            fin_spans_2 = gr.HighlightedText(label="Financial Tone Analysis for PDF 2")
-            analyze_btn2.click(fn=lambda p: process_paragraph_2_sent_tone(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2)
-            analyze_btn2_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
-            fin_spans_2_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 2 bis")
-            analyze_btn2_.click(fn=lambda p: process_paragraph_2_sent_tone_bis(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2_)
 demo.launch()

 from transformers import pipeline
 import spacy
 import lib.read_pdf
+import pandas as pd
+import re
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import io
 # Initialize spaCy model
 nlp = spacy.load('en_core_web_sm')
 nlp.add_pipe('sentencizer')
 def show(name):
     return f"{name}"
+def get_excel_files(folder):
+    return [f for f in os.listdir(folder) if f.endswith('.xlsx')]
+def get_sheet_names(file):
+    xls = pd.ExcelFile(os.path.join(DATA_FOLDER, file))
+    return xls.sheet_names
+def process_and_compare(file1, sheet1, file2, sheet2):
+    def process_file(file_path, sheet_name):
+        # Extract year from file name
+        year = int(re.search(r'(\d{4})', file_path).group(1))
+        # Load the Excel file
+        df = pd.read_excel(os.path.join("data", file_path), sheet_name=sheet_name, index_col=0)
+        # Define expected columns based on extracted year
+        historical_col = f'Historical {year - 1}'
+        baseline_cols = [f'Baseline {year}', f'Baseline {year + 1}', f'Baseline {year + 2}']
+        adverse_cols = [f'Adverse {year}', f'Adverse {year + 1}', f'Adverse {year + 2}']
+        level_deviation_col = f'Level Deviation {year + 2}'
+        # Drop rows and reset index
+        df = df.iloc[4:].reset_index(drop=True)
+        # Define the new column names
+        new_columns = ['Country', 'Code', historical_col] + baseline_cols + adverse_cols + ['Adverse Cumulative', 'Adverse Minimum', level_deviation_col]
+        # Ensure the number of columns matches
+        if len(df.columns) == len(new_columns):
+            df.columns = new_columns
+        else:
+            raise ValueError(f"Expected {len(new_columns)} columns, but found {len(df.columns)} columns in the data.")
+        return df
+    # Process both files
+    df1 = process_file(file1, sheet1)
+    df2 = process_file(file2, sheet2)
+    year1 = int(re.search(r'(\d{4})', file1).group(1))
+    year2 = int(re.search(r'(\d{4})', file2).group(1))
+    # Calculate the differences
+    # historical_col1 = f'Historical {int(year1) - 1}'
+    # historical_col2 = f'Historical {int(year2) - 1}'
+    # df1['Historical vs Adverse'] = df1[historical_col1] - df1['Adverse Cumulative']
+    # df2['Historical vs Adverse'] = df2[historical_col2] - df2['Adverse Cumulative']
+    # Merge dataframes on 'Country'
+    merged_df = pd.merge(df2, df1, on='Country', suffixes=(f'_{year1}', f'_{year2}'))
+    merged_df['Difference adverse cumulative growth'] = merged_df[f'Adverse Cumulative_{year2}'] - merged_df[f'Adverse Cumulative_{year1}']
+    # Ensure data types are correct
+    merged_df['Country'] = merged_df['Country'].astype(str)
+    merged_df['Difference adverse cumulative growth'] = pd.to_numeric(merged_df['Difference adverse cumulative growth'], errors='coerce')
+    # Create histogram plot with color coding
+    fig, ax = plt.subplots(figsize=(12, 8))
+    colors = plt.get_cmap('tab20').colors  # Use a colormap with multiple colors
+    num_countries = len(merged_df['Country'])
+    bars = ax.bar(merged_df['Country'], merged_df['Difference adverse cumulative growth'], color=colors[:num_countries])
+    # Add a legend
+    handles = [patches.Patch(color=color, label=country) for color, country in zip(colors[:num_countries], merged_df['Country'])]
+    ax.legend(handles=handles, title='Countries', bbox_to_anchor=(1.05, 1), loc='upper left')
+    ax.set_title(f'Histogram of Difference between Adverse cumulative growth of {year2} and {year1} for {sheet1}')
+    ax.set_xlabel('Country')
+    ax.set_ylabel('Difference')
+    plt.xticks(rotation=90)
+    # Save plot to BytesIO object
+    buf = io.BytesIO()
+    plt.savefig(buf, format='png', bbox_inches='tight')
+    buf.seek(0)
+    img = buf.getvalue()
+    buf.close()
+    return img
 stored_paragraphs_1 = []
 stored_paragraphs_2 = []
 with gr.Blocks() as demo:
+    with gr.Tab("Financial Report Text Analysis"):
+        gr.Markdown("## Financial Report Paragraph Selection and Analysis on adverse macro-economy scenario")
+        with gr.Row():
+            # Upload PDFs
+            with gr.Column():
+                pdf1 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 1")
+                pdf2 = gr.Dropdown(choices=get_pdf_files(PDF_FOLDER), label="Select PDF 2")
+            with gr.Column():
+                b1 = gr.Button("Extract and Display Paragraphs")
+                paragraph_1_dropdown = gr.Dropdown(label="Select Paragraph from PDF 1")
+                paragraph_2_dropdown = gr.Dropdown(label="Select Paragraph from PDF 2")
+                def update_paragraphs(pdf1, pdf2):
+                    global stored_paragraphs_1, stored_paragraphs_2
+                    stored_paragraphs_1, stored_paragraphs_2 = extract_and_summarize(pdf1, pdf2)
+                    updated_dropdown_1 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_1)]
+                    updated_dropdown_2 = [f"Paragraph {i+1}: {p[:100]}..." for i, p in enumerate(stored_paragraphs_2)]
+                    return gr.update(choices=updated_dropdown_1), gr.update(choices=updated_dropdown_2)
+                b1.click(fn=update_paragraphs, inputs=[pdf1, pdf2], outputs=[paragraph_1_dropdown, paragraph_2_dropdown])
+        with gr.Row():
+            # Process the selected paragraph from PDF 1
+            with gr.Column():
+                gr.Markdown("### PDF 1 Analysis")
+                selected_paragraph_1 = gr.Textbox(label="Selected Paragraph 1 Content", lines=4)
+                selected_paragraph_1.change(show, paragraph_1_dropdown, selected_paragraph_1)
+                summarize_btn1 = gr.Button("Summarize Text from PDF 1")
+                summary_textbox_1 = gr.Textbox(label="Summary for PDF 1", lines=2)
+                summarize_btn1.click(fn=lambda p: process_paragraph_1_sum(p), inputs=paragraph_1_dropdown, outputs=summary_textbox_1)
+                sentiment_btn1 = gr.Button("Classify Financial Tone from PDF 1")
+                sentiment_textbox_1 = gr.Textbox(label="Classification for PDF 1", lines=1)
+                sentiment_btn1.click(fn=lambda p: process_paragraph_1_sent(p), inputs=paragraph_1_dropdown, outputs=sentiment_textbox_1)
+                analyze_btn1 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
+                fin_spans_1 = gr.HighlightedText(label="Financial Tone Analysis for PDF 1")
+                analyze_btn1.click(fn=lambda p: process_paragraph_1_sent_tone(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1)
+                analyze_btn1_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
+                fin_spans_1_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 1 bis")
+                analyze_btn1_.click(fn=lambda p: process_paragraph_1_sent_tone_bis(p), inputs=paragraph_1_dropdown, outputs=fin_spans_1_)
+            # Process the selected paragraph from PDF 2
+            with gr.Column():
+                gr.Markdown("### PDF 2 Analysis")
+                selected_paragraph_2 = gr.Textbox(label="Selected Paragraph 2 Content", lines=4)
+                selected_paragraph_2.change(show, paragraph_2_dropdown, selected_paragraph_2)
+                summarize_btn2 = gr.Button("Summarize Text from PDF 2")
+                summary_textbox_2 = gr.Textbox(label="Summary for PDF 2", lines=2)
+                summarize_btn2.click(fn=lambda p: process_paragraph_2_sum(p), inputs=paragraph_2_dropdown, outputs=summary_textbox_2)
+                sentiment_btn2 = gr.Button("Classify Financial Tone from PDF 2")
+                sentiment_textbox_2 = gr.Textbox(label="Classification for PDF 2", lines=1)
+                sentiment_btn2.click(fn=lambda p: process_paragraph_2_sent(p), inputs=paragraph_2_dropdown, outputs=sentiment_textbox_2)
+                analyze_btn2 = gr.Button("Analyze Financial Tone on each sentence with yiyanghkust/finbert-tone")
+                fin_spans_2 = gr.HighlightedText(label="Financial Tone Analysis for PDF 2")
+                analyze_btn2.click(fn=lambda p: process_paragraph_2_sent_tone(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2)
+                analyze_btn2_ = gr.Button("Analyze Financial Tone on each sentence with ProsusAI/finbert")
+                fin_spans_2_ = gr.HighlightedText(label="Financial Tone Analysis for PDF 2 bis")
+                analyze_btn2_.click(fn=lambda p: process_paragraph_2_sent_tone_bis(p), inputs=paragraph_2_dropdown, outputs=fin_spans_2_)
+    with gr.Tab("Financial Report Table Analysis"):
+        # New tab content goes here
+        gr.Markdown("## Excel Data Comparison")
+        with gr.Row():
+            with gr.Column():
+                file1 = gr.Dropdown(choices=get_excel_files(DATA_FOLDER), label="Select Excel File 1")
+                file2 = gr.Dropdown(choices=get_excel_files(DATA_FOLDER), label="Select Excel File 2")
+                sheet = gr.Dropdown(choices=[], label="Select Sheet for File 1 and 2")
+            with gr.Column():
+                result = gr.Image(label="Comparison pLot")
+        def update_sheets(file):
+            return get_sheet_names(file)
+        file1.change(fn=update_sheets, inputs=file1, outputs=sheet)
+        file2.change(fn=update_sheets, inputs=file2, outputs=sheet)
+        b1 = gr.Button("Compare Data")
+        b1.click(fn=process_and_compare, inputs=[file1, sheet, file2, sheet], outputs=result)
 demo.launch()