File size: 9,147 Bytes
bb1b68b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
import streamlit as st
import pandas as pd
from functions import *
from dotenv import load_dotenv

load_dotenv()

def initialize_session_state():
    if 'processing_complete' not in st.session_state:
        st.session_state['processing_complete'] = False
    if 'results_df' not in st.session_state:
        st.session_state['results_df'] = None
    if 'output_choice' not in st.session_state:
        st.session_state['output_choice'] = "Download CSV"

initialize_session_state()

def main():
    st.title("InfoSynth")
    
    df = None
    
    # File upload section
    st.header("1. Upload Your Data")
    data_source = st.radio("Choose a data source:", ["CSV File", "Google Sheet"])

    if data_source == "CSV File":
        uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
        
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
    else:
        st.info(
        "Before proceeding, ensure your Google Sheet is shared with the service account. "
        "You can find the service account email in your credentials.json file."
        )
        spreadsheet_id = st.text_input(
            "Enter Google Spreadsheet ID", 
            help="You can find this in the spreadsheet URL between /d/ and /edit"
        )
        
        sheet_names = None
        if spreadsheet_id:
            try:
                sheet_names = get_all_sheet_names(spreadsheet_id)
                if not sheet_names:
                    st.error("No sheets found in this spreadsheet. Please check the ID and permissions.")
            except ValueError as e:
                st.error(f"Error accessing spreadsheet: {str(e)}")
                st.info("Please check the ID and permissions.")
            except Exception as e:
                st.error(f"Error accessing spreadsheet: {str(e)}")
                sheet_names = []
        
        sheet_name = None
        if sheet_names:
            sheet_name = st.selectbox(
                "Select Sheet Name",
                options=sheet_names,
                help="The name of the specific sheet to read from"
            )
        
        if spreadsheet_id and sheet_name:
            try:
                df = load_google_sheet(spreadsheet_id, sheet_name)
                if df is None or df.empty:
                    st.error("No data found in the selected sheet.")
            except Exception as e:
                st.error(f"Error loading sheet data: {str(e)}")
                df = None

    if df is not None:
        try:
            # Display available columns for selection
            st.header("2. Select Primary Column")
            primary_column = st.selectbox(
                "Choose the main column for analysis:",
                options=df.columns.tolist()
            )
            
            # Show data preview
            st.header("3. Data Preview")
            st.write("First 5 rows of your data:")
            st.dataframe(df.head())            
            
            # Add Query Template Section
            st.header("4. Query Template")
            st.write(f"""

            Create your query template using {primary_column} as a placeholder.

            Example: "What products does {primary_column} offer?"

            """)
            
            query_template = st.text_area(
                "Enter your query template:",
                value=f"Tell me about {{{primary_column}}}",
                help=f"Use {{{primary_column}}} as a placeholder"
            )
            
            # Preview generated queries
            #if st.button("Preview Generated Queries"):
            #    st.subheader("Generated Queries Preview")
            #    # Get first 5 values from the selected column
            #    sample_values = df[primary_column].head()
            #    
            #    # Display example queries
            #    for value in sample_values:
            #        generated_query = query_template.replace(
            #            f"{{{primary_column}}}", str(value)
            #        )
            #        st.write(f"- {generated_query}")
            #    
            #    # Show total number of queries that will be generated
            #    st.info(f"Total queries to be generated: {len(df)}")
            
            # Add confirmation and processing section
            st.header("5. Process Queries")
            total_queries = len(df[primary_column])
            estimated_time = total_queries * 2  # 2 second per query due to rate limiting
            
            st.warning(f"""

            ⚠️ Please confirm:

            - Number of queries to process: {total_queries}

            - Estimated processing time: {estimated_time} seconds ({estimated_time/60:.1f} minutes)

            - This will use {total_queries} API calls

            """)
            
            # Show sample of what will be processed
            #st.subheader("Sample of data to be processed:")
            #sample_df = df[[primary_column]].head()
            #st.dataframe(sample_df)
            
            # Process button with confirmation
            if st.button("Start Processing"):
                with st.spinner("Processing queries..."):
                    # Add a progress bar
                    progress_bar = st.progress(0)
                    
                    results = []
                    llm = setup_llm()
                    for index, row in df.iterrows():
                        try:
                            value = row[primary_column]
                            
                            # Handle empty/null values
                            if pd.isna(value) or str(value).strip() == '':
                                results.append({
                                    'input_value': value,
                                    'result': 'NA'
                                })
                                continue
                            
                            query = query_template.replace(f"{{{primary_column}}}", str(value))
                            
                            # Display current processing item
                            st.text(f"Processing: {value}")
                            
                            # Process query
                            result = process_queries(pd.DataFrame([row]), primary_column, query)
                            output = process_with_ai(result, query, llm)
                            
                            results.append({
                                'input_value': value,
                                'result': output.content
                            })
                            
                            # Update progress
                            progress_bar.progress((index + 1) / total_queries)
                            
                        except Exception as e:
                            st.error(f"Error processing {value}: {str(e)}")
                            continue
                    
                    # Show completion and results
                    st.session_state['processing_complete'] = True
                    st.session_state['results_df'] = pd.DataFrame(results, columns=['input_value', 'result'])

            # Show results and save options if processing is complete
            if st.session_state['processing_complete']:
                st.success(f"✅ Completed processing {len(st.session_state['results_df'])} queries!")
                
                st.subheader("Results Preview:")
                st.dataframe(st.session_state['results_df'].head())
                
                st.header("6. Save Results")
                output_choice = st.radio("Choose an output format:", ["Download CSV", "Update Google Sheet"])

                if output_choice == "Download CSV":
                    csv = st.session_state['results_df'].to_csv(index=False)
                    if st.download_button(
                        "Download Complete Results (CSV)",
                        csv,
                        "search_results.csv",
                        "text/csv",
                        key='download-csv'
                    ):
                        st.success("✅ File downloaded successfully!")
                        
                elif output_choice == "Update Google Sheet":
                    update_button = st.button("Confirm Update to Google Sheet")
                    if update_button:
                        try:
                            write_to_google_sheet(spreadsheet_id, sheet_name, st.session_state['results_df'])
                            st.success("✅ Results successfully added as new column!")
                        except Exception as e:
                            st.error(f"Error updating sheet: {str(e)}")
        except Exception as e:
            st.error(f"Error processing the file: {str(e)}")

if __name__ == "__main__":
    main()