import streamlit as st import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt import tempfile import subprocess from groq import Groq # Groq API Key setup GROQ_API_KEY = "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" client = Groq(api_key=GROQ_API_KEY) # Groq Chat Function. def chat_with_groq(prompt): try: chat_completion = client.chat.completions.create( messages=[{"role": "system", "content": "[INSTRUCTIONS DO NOT GENERATE CODE BUT DO THE PROCCESING YOURSELF]"},{"role": "user", "content": prompt}], model="llama3-8b-8192", stream=False ) print(prompt) return chat_completion.choices[0].message.content except Exception as e: return f"Error fetching response: {e}" def generate_code_with_groq(prompt): try: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": prompt}, {"role": "assistant", "content": "```python"}], model="gemma-7b-it", stream=False, stop="```" ) return chat_completion.choices[0].message.content except Exception as e: return f"Error fetching response: {e}" # File Parsing Functions def parse_file(uploaded_file): filename = uploaded_file.name if filename.endswith('.csv'): return pd.read_csv(uploaded_file) elif filename.endswith('.xlsx'): return pd.read_excel(uploaded_file) else: st.error("Unsupported file type! Only CSV and Excel are supported.") return None # Preprocess DataFrame to Fix Type Issues def preprocess_dataframe(df): try: # Convert problematic columns to string to avoid Arrow serialization issues for col in df.columns: if df[col].dtype.name == 'object' or df[col].dtype.name == 'category': df[col] = df[col].astype(str) return df except Exception as e: st.error(f"Error preprocessing data: {e}") return None # Analysis Function def analyze_data(data, visualization_type): st.subheader("Basic Analysis") st.write("Shape of Data:", data.shape) # Combine numerical and non-numerical summaries numeric_data = data.select_dtypes(include=[np.number]) if visualization_type == "Bar Chart" and not numeric_data.empty: st.subheader("Bar Chart") x_col = st.selectbox("Select the X-axis column for the Bar Chart (Non-Numeric):", data.columns) y_col = st.selectbox("Select the Y-axis column for the Bar Chart (Numeric):", data.columns) fig, ax = plt.subplots(figsize=(8, 6)) data.groupby(x_col)[y_col].sum().plot(kind='bar', ax=ax) ax.set_xlabel(x_col) ax.set_ylabel(y_col) st.pyplot(fig) elif visualization_type == "Line Graph" and not numeric_data.empty: st.subheader("Line Graph") x_col = st.selectbox("Select the X-axis column for the Line Graph (Non-Numeric):", numeric_data.columns) y_col = st.selectbox("Select the Y-axis column for the Line Graph (Numeric):", numeric_data.columns) fig, ax = plt.subplots(figsize=(8, 6)) ax.plot(data[x_col], data[y_col]) ax.set_xlabel(x_col) ax.set_ylabel(y_col) st.pyplot(fig) elif visualization_type == "Area Chart" and not numeric_data.empty: st.subheader("Area Chart") column = st.selectbox("Select a column for the Area Chart:", numeric_data.columns) fig, ax = plt.subplots(figsize=(8, 6)) data[column].plot(kind='area', ax=ax) ax.set_xlabel(column) ax.set_ylabel("Area") st.pyplot(fig) else: st.warning("The database provided has no numerical data, so it isnt availble for visualisation. But you can chat with it") # Automatically generate a prompt for Groq based on the analysis prompt = generate_groq_prompt(data, visualization_type) return prompt # Function to generate a prompt based on the data analysis def generate_groq_prompt(data, visualization_type): # Convert DataFrame to a string without the index data_without_index = data.to_string(index=False) prompt = f""" Here is the summary statistics for the dataset: {data_without_index} The user has selected the '{visualization_type}' visualization type. Please generate Python code that does this and for any data, please don't use any file input. Write the data in the code. """ return prompt # Streamlit App st.title("Data Analysis AI") st.markdown("Upload a file (CSV or Excel) to analyze it.") uploaded_file = st.file_uploader("Choose a file", type=['csv', 'xlsx']) if uploaded_file is not None: try: data = parse_file(uploaded_file) if data is not None: data = preprocess_dataframe(data) # Fix serialization issues st.subheader("Uploaded Data") st.write(data) # Display the full dataset without truncation # Visualization Selection visualization_type = st.selectbox( "Select a visualization type:", ["Bar Chart", "Line Graph", "Area Chart"] ) # Perform Analysis and Visualization prompt = analyze_data(data, visualization_type) # Chat with Groq Section st.subheader("Chat with Groq") chat_input = st.text_area("Ask Groq questions about the data:") if st.button("Chat"): if chat_input: chat_response = chat_with_groq(f"Here is the data:\n{data}\n\n{chat_input}") st.write("Groq's Response:") st.write(chat_response) # Groq Code Generation Section st.subheader("Generate Python Code with Groq") prompt_input = st.text_area("Describe the analysis or visualization you want to generate code for:") if st.button("Generate Code"): if prompt_input: prompt += f"\n\nUser request: {prompt_input}" response = generate_code_with_groq(prompt) # Display the Groq response st.subheader("Generated Code") st.code(response, language="python") except Exception as e: st.error(f"An error occurred: {e}")