Spaces:
Sleeping
Sleeping
File size: 9,147 Bytes
bb1b68b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 |
import streamlit as st
import pandas as pd
from functions import *
from dotenv import load_dotenv
load_dotenv()
def initialize_session_state():
if 'processing_complete' not in st.session_state:
st.session_state['processing_complete'] = False
if 'results_df' not in st.session_state:
st.session_state['results_df'] = None
if 'output_choice' not in st.session_state:
st.session_state['output_choice'] = "Download CSV"
initialize_session_state()
def main():
st.title("InfoSynth")
df = None
# File upload section
st.header("1. Upload Your Data")
data_source = st.radio("Choose a data source:", ["CSV File", "Google Sheet"])
if data_source == "CSV File":
uploaded_file = st.file_uploader("Choose a CSV file", type=['csv'])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
else:
st.info(
"Before proceeding, ensure your Google Sheet is shared with the service account. "
"You can find the service account email in your credentials.json file."
)
spreadsheet_id = st.text_input(
"Enter Google Spreadsheet ID",
help="You can find this in the spreadsheet URL between /d/ and /edit"
)
sheet_names = None
if spreadsheet_id:
try:
sheet_names = get_all_sheet_names(spreadsheet_id)
if not sheet_names:
st.error("No sheets found in this spreadsheet. Please check the ID and permissions.")
except ValueError as e:
st.error(f"Error accessing spreadsheet: {str(e)}")
st.info("Please check the ID and permissions.")
except Exception as e:
st.error(f"Error accessing spreadsheet: {str(e)}")
sheet_names = []
sheet_name = None
if sheet_names:
sheet_name = st.selectbox(
"Select Sheet Name",
options=sheet_names,
help="The name of the specific sheet to read from"
)
if spreadsheet_id and sheet_name:
try:
df = load_google_sheet(spreadsheet_id, sheet_name)
if df is None or df.empty:
st.error("No data found in the selected sheet.")
except Exception as e:
st.error(f"Error loading sheet data: {str(e)}")
df = None
if df is not None:
try:
# Display available columns for selection
st.header("2. Select Primary Column")
primary_column = st.selectbox(
"Choose the main column for analysis:",
options=df.columns.tolist()
)
# Show data preview
st.header("3. Data Preview")
st.write("First 5 rows of your data:")
st.dataframe(df.head())
# Add Query Template Section
st.header("4. Query Template")
st.write(f"""
Create your query template using {primary_column} as a placeholder.
Example: "What products does {primary_column} offer?"
""")
query_template = st.text_area(
"Enter your query template:",
value=f"Tell me about {{{primary_column}}}",
help=f"Use {{{primary_column}}} as a placeholder"
)
# Preview generated queries
#if st.button("Preview Generated Queries"):
# st.subheader("Generated Queries Preview")
# # Get first 5 values from the selected column
# sample_values = df[primary_column].head()
#
# # Display example queries
# for value in sample_values:
# generated_query = query_template.replace(
# f"{{{primary_column}}}", str(value)
# )
# st.write(f"- {generated_query}")
#
# # Show total number of queries that will be generated
# st.info(f"Total queries to be generated: {len(df)}")
# Add confirmation and processing section
st.header("5. Process Queries")
total_queries = len(df[primary_column])
estimated_time = total_queries * 2 # 2 second per query due to rate limiting
st.warning(f"""
⚠️ Please confirm:
- Number of queries to process: {total_queries}
- Estimated processing time: {estimated_time} seconds ({estimated_time/60:.1f} minutes)
- This will use {total_queries} API calls
""")
# Show sample of what will be processed
#st.subheader("Sample of data to be processed:")
#sample_df = df[[primary_column]].head()
#st.dataframe(sample_df)
# Process button with confirmation
if st.button("Start Processing"):
with st.spinner("Processing queries..."):
# Add a progress bar
progress_bar = st.progress(0)
results = []
llm = setup_llm()
for index, row in df.iterrows():
try:
value = row[primary_column]
# Handle empty/null values
if pd.isna(value) or str(value).strip() == '':
results.append({
'input_value': value,
'result': 'NA'
})
continue
query = query_template.replace(f"{{{primary_column}}}", str(value))
# Display current processing item
st.text(f"Processing: {value}")
# Process query
result = process_queries(pd.DataFrame([row]), primary_column, query)
output = process_with_ai(result, query, llm)
results.append({
'input_value': value,
'result': output.content
})
# Update progress
progress_bar.progress((index + 1) / total_queries)
except Exception as e:
st.error(f"Error processing {value}: {str(e)}")
continue
# Show completion and results
st.session_state['processing_complete'] = True
st.session_state['results_df'] = pd.DataFrame(results, columns=['input_value', 'result'])
# Show results and save options if processing is complete
if st.session_state['processing_complete']:
st.success(f"✅ Completed processing {len(st.session_state['results_df'])} queries!")
st.subheader("Results Preview:")
st.dataframe(st.session_state['results_df'].head())
st.header("6. Save Results")
output_choice = st.radio("Choose an output format:", ["Download CSV", "Update Google Sheet"])
if output_choice == "Download CSV":
csv = st.session_state['results_df'].to_csv(index=False)
if st.download_button(
"Download Complete Results (CSV)",
csv,
"search_results.csv",
"text/csv",
key='download-csv'
):
st.success("✅ File downloaded successfully!")
elif output_choice == "Update Google Sheet":
update_button = st.button("Confirm Update to Google Sheet")
if update_button:
try:
write_to_google_sheet(spreadsheet_id, sheet_name, st.session_state['results_df'])
st.success("✅ Results successfully added as new column!")
except Exception as e:
st.error(f"Error updating sheet: {str(e)}")
except Exception as e:
st.error(f"Error processing the file: {str(e)}")
if __name__ == "__main__":
main()
|