Spaces:

Umang-Bansal
/

Infosynth

Sleeping

File size: 9,147 Bytes

bb1b68b

import streamlit as st
import pandas as pd
from functions import *
from dotenv import load_dotenv

load_dotenv()

def initialize_session_state():
    if 'processing_complete' not in st.session_state:
        st.session_state['processing_complete'] = False
    if 'results_df' not in st.session_state:
        st.session_state['results_df'] = None
    if 'output_choice' not in st.session_state:
        st.session_state['output_choice'] = "Download CSV"

initialize_session_state()

def main():
    st.title("InfoSynth")
    
    df = None
    
    # File upload section
    st.header("1. Upload Your Data")
    data_source = st.radio("Choose a data source:", ["CSV File", "Google Sheet"])

    if data_source == "CSV File":
        uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
        
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
    else:
        st.info(
        "Before proceeding, ensure your Google Sheet is shared with the service account. "
        "You can find the service account email in your credentials.json file."
        )
        spreadsheet_id = st.text_input(
            "Enter Google Spreadsheet ID", 
            help="You can find this in the spreadsheet URL between /d/ and /edit"
        )
        
        sheet_names = None
        if spreadsheet_id:
            try:
                sheet_names = get_all_sheet_names(spreadsheet_id)
                if not sheet_names:
                    st.error("No sheets found in this spreadsheet. Please check the ID and permissions.")
            except ValueError as e:
                st.error(f"Error accessing spreadsheet: {str(e)}")
                st.info("Please check the ID and permissions.")
            except Exception as e:
                st.error(f"Error accessing spreadsheet: {str(e)}")
                sheet_names = []
        
        sheet_name = None
        if sheet_names:
            sheet_name = st.selectbox(
                "Select Sheet Name",
                options=sheet_names,
                help="The name of the specific sheet to read from"
            )
        
        if spreadsheet_id and sheet_name:
            try:
                df = load_google_sheet(spreadsheet_id, sheet_name)
                if df is None or df.empty:
                    st.error("No data found in the selected sheet.")
            except Exception as e:
                st.error(f"Error loading sheet data: {str(e)}")
                df = None

    if df is not None:
        try:
            # Display available columns for selection
            st.header("2. Select Primary Column")
            primary_column = st.selectbox(
                "Choose the main column for analysis:",
                options=df.columns.tolist()
            )
            
            # Show data preview
            st.header("3. Data Preview")
            st.write("First 5 rows of your data:")
            st.dataframe(df.head())            
            
            # Add Query Template Section
            st.header("4. Query Template")
            st.write(f"""

            Create your query template using {primary_column} as a placeholder.

            Example: "What products does {primary_column} offer?"

            """)
            
            query_template = st.text_area(
                "Enter your query template:",
                value=f"Tell me about {{{primary_column}}}",
                help=f"Use {{{primary_column}}} as a placeholder"
            )
            
            # Preview generated queries
            #if st.button("Preview Generated Queries"):
            #    st.subheader("Generated Queries Preview")
            #    # Get first 5 values from the selected column
            #    sample_values = df[primary_column].head()
            #    
            #    # Display example queries
            #    for value in sample_values:
            #        generated_query = query_template.replace(
            #            f"{{{primary_column}}}", str(value)
            #        )
            #        st.write(f"- {generated_query}")
            #    
            #    # Show total number of queries that will be generated
            #    st.info(f"Total queries to be generated: {len(df)}")
            
            # Add confirmation and processing section
            st.header("5. Process Queries")
            total_queries = len(df[primary_column])
            estimated_time = total_queries * 2  # 2 second per query due to rate limiting
            
            st.warning(f"""

            ⚠️ Please confirm:

            - Number of queries to process: {total_queries}

            - Estimated processing time: {estimated_time} seconds ({estimated_time/60:.1f} minutes)

            - This will use {total_queries} API calls

            """)
            
            # Show sample of what will be processed
            #st.subheader("Sample of data to be processed:")
            #sample_df = df[[primary_column]].head()
            #st.dataframe(sample_df)
            
            # Process button with confirmation
            if st.button("Start Processing"):
                with st.spinner("Processing queries..."):
                    # Add a progress bar
                    progress_bar = st.progress(0)
                    
                    results = []
                    llm = setup_llm()
                    for index, row in df.iterrows():
                        try:
                            value = row[primary_column]
                            
                            # Handle empty/null values
                            if pd.isna(value) or str(value).strip() == '':
                                results.append({
                                    'input_value': value,
                                    'result': 'NA'
                                })
                                continue
                            
                            query = query_template.replace(f"{{{primary_column}}}", str(value))
                            
                            # Display current processing item
                            st.text(f"Processing: {value}")
                            
                            # Process query
                            result = process_queries(pd.DataFrame([row]), primary_column, query)
                            output = process_with_ai(result, query, llm)
                            
                            results.append({
                                'input_value': value,
                                'result': output.content
                            })
                            
                            # Update progress
                            progress_bar.progress((index + 1) / total_queries)
                            
                        except Exception as e:
                            st.error(f"Error processing {value}: {str(e)}")
                            continue
                    
                    # Show completion and results
                    st.session_state['processing_complete'] = True
                    st.session_state['results_df'] = pd.DataFrame(results, columns=['input_value', 'result'])

            # Show results and save options if processing is complete
            if st.session_state['processing_complete']:
                st.success(f"✅ Completed processing {len(st.session_state['results_df'])} queries!")
                
                st.subheader("Results Preview:")
                st.dataframe(st.session_state['results_df'].head())
                
                st.header("6. Save Results")
                output_choice = st.radio("Choose an output format:", ["Download CSV", "Update Google Sheet"])

                if output_choice == "Download CSV":
                    csv = st.session_state['results_df'].to_csv(index=False)
                    if st.download_button(
                        "Download Complete Results (CSV)",
                        csv,
                        "search_results.csv",
                        "text/csv",
                        key='download-csv'
                    ):
                        st.success("✅ File downloaded successfully!")
                        
                elif output_choice == "Update Google Sheet":
                    update_button = st.button("Confirm Update to Google Sheet")
                    if update_button:
                        try:
                            write_to_google_sheet(spreadsheet_id, sheet_name, st.session_state['results_df'])
                            st.success("✅ Results successfully added as new column!")
                        except Exception as e:
                            st.error(f"Error updating sheet: {str(e)}")
        except Exception as e:
            st.error(f"Error processing the file: {str(e)}")

if __name__ == "__main__":
    main()