ProfessorLeVesseur commited on
Commit
48f9ec0
·
verified ·
1 Parent(s): dd5ba5b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1083 -211
app.py CHANGED
@@ -1,238 +1,1110 @@
1
- #------------------------------------------------------------------------
2
- # Import Modules
3
- #------------------------------------------------------------------------
4
 
5
- import streamlit as st
6
- import openai
7
- import random
8
- import os
 
 
 
9
 
10
- from pinecone import Pinecone
11
- from langchain.chat_models import ChatOpenAI
12
- from langsmith import Client
13
- from langchain.smith import RunEvalConfig, run_on_dataset
14
 
15
- #------------------------------------------------------------------------
16
- # Load API Keys From the .env File, & OpenAI, Pinecone, and LangSmith Client
17
- #------------------------------------------------------------------------
 
 
18
 
19
- # Fetch the OpenAI API key from Streamlit secrets
20
- os.environ["OPENAI_API_KEY"] = st.secrets["OPENAI_API_KEY"]
21
- # Retrieve the OpenAI API Key from environment variable
22
- OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
23
- # Initialize OpenAI Service
24
- openai.api_key = OPENAI_API_KEY
25
-
26
- # Fetch Pinecone API key from Streamlit secrets
27
- os.environ["PINECONE_API_KEY"] = st.secrets["PINECONE_API_KEY"]
28
- # Retrieve the Pinecone API Key from environment variable
29
- PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
30
- # Initialize Pinecone Service
31
- # from pinecone import Pinecone
32
- pc = Pinecone(api_key=PINECONE_API_KEY)
33
-
34
- # Fetch LangSmith API key from Streamlit secrets
35
- # os.environ["LANGCHAIN_API_KEY"] = st.secrets["LANGCHAIN_API_KEY"]
36
- os.environ["LANGCHAIN_API_KEY"] = "ls__1819fb2979e44f0a9e410688d81c6390"
37
- os.environ["LANGCHAIN_TRACING_V2"] = "true"
38
- os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
39
- os.environ["LANGCHAIN_PROJECT"] = "Inkqa"
40
- # Retrieve the LangSmith API Key from environment variable
41
- LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
42
- # Initialize LangSmith Service
43
- client = Client(api_key=LANGCHAIN_API_KEY) #langsmith client
44
-
45
- # #NEW but "Restarting" never stops
46
- # # Fetch LangSmith API key from Streamlit secrets
47
- # os.environ["LANGCHAIN_API_KEY"] = st.secrets["LANGCHAIN_API_KEY"]
48
- # os.environ["LANGCHAIN_TRACING_V2"] = "true"
49
- # os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
50
- # os.environ["LANGCHAIN_PROJECT"] = "Inkqa"
51
- # # Retrieve the LangSmith API Key from environment variable
52
- # LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
53
- # # Initialize LangSmith Service
54
- # client = Client(api_key=LANGCHAIN_API_KEY) #langsmith client
55
 
56
- #------------------------------------------------------------------------
57
- # Initialize
58
- #------------------------------------------------------------------------
 
 
59
 
60
- # # Define the name of the Pinecone index
61
- index_name = 'mimtssinkqa'
62
 
63
- # Initialize the OpenAI embeddings object
64
- from langchain_openai import OpenAIEmbeddings
65
- # embeddings = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
66
- embeddings = OpenAIEmbeddings()
67
 
68
- # LOAD VECTOR STORE FROM EXISTING INDEX
69
- from langchain_community.vectorstores import Pinecone
70
- vector_store = Pinecone.from_existing_index(index_name='mimtssinkqa', embedding=embeddings)
 
 
 
71
 
72
- def ask_with_memory(vector_store, query, chat_history=[]):
73
- from langchain_openai import ChatOpenAI
74
- from langchain.chains import ConversationalRetrievalChain
75
- from langchain.memory import ConversationBufferMemory
76
 
77
- from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
 
78
 
79
- # llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.5, openai_api_key=OPENAI_API_KEY)
80
- llm = ChatOpenAI(model_name='gpt-3.5-turbo', temperature=0.5)
81
-
82
- retriever = vector_store.as_retriever(search_type='similarity', search_kwargs={'k': 3})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
- memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
85
-
86
- system_template = r'''
87
- Article Title: 'Intensifying Literacy Instruction: Essential Practices.'
88
- Article Focus: The main focus of the article is reading and the secondary focus is writing.
89
- Expertise: Assume the role of an expert literacy coach with in-depth knowledge of the Simple View of Reading, School-Wide Positive Behavioral Interventions and Supports (SWPBIS), and Social Emotional Learning (SEL).
90
- Audience: Tailor your response for teachers and administrators seeking to enhance literacy instruction within their educational settings.
91
- Response Requirements: Provide an answer utilizing the context provided. Unless specifically requested by the user, avoid mentioning the article's header.
92
- Cover all necessary details relevant to the question posed, drawing on your expertise in literacy instruction and the Simple View of Reading.
93
- Utilize paragraphs for detailed and descriptive explanations, and bullet points for highlighting key points or steps, ensuring the information is easily understood.
94
- Conclude with a recapitulation of main points, summarizing the essential takeaways from your response.
95
- ----------------
96
- Context: ```{context}```
97
- '''
98
-
99
- user_template = '''
100
- Question: ```{question}```
101
- Chat History: ```{chat_history}```
102
- '''
103
-
104
- messages= [
105
- SystemMessagePromptTemplate.from_template(system_template),
106
- HumanMessagePromptTemplate.from_template(user_template)
107
- ]
108
-
109
- qa_prompt = ChatPromptTemplate.from_messages (messages)
110
-
111
- chain = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, memory=memory,chain_type='stuff', combine_docs_chain_kwargs={'prompt': qa_prompt}, verbose=False
112
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
- result = chain.invoke({'question': query, 'chat_history': st.session_state['history']})
115
- # Append to chat history as a dictionary
116
- st.session_state['history'].append((query, result['answer']))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
- return (result['answer'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
- # Initialize chat history
121
- if 'history' not in st.session_state:
122
- st.session_state['history'] = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
- # # STREAMLIT APPLICATION SETUP WITH PASSWORD
 
 
 
 
 
125
 
126
- # Define the correct password
127
- # correct_password = "MiBLSi"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
- #Add the image with a specified width
130
- image_width = 300 # Set the desired width in pixels
131
- st.image('MTSS.ai_Logo.png', width=image_width)
132
- st.subheader('Ink QA™ | Dynamic PDFs')
133
 
134
- # Using Markdown for formatted text
135
- st.markdown("""
136
- Resource: **Intensifying Literacy Instruction: Essential Practices**
137
- """, unsafe_allow_html=True)
 
 
 
 
 
 
 
 
 
 
138
 
 
 
 
139
  with st.sidebar:
140
  # Password input field
141
  # password = st.text_input("Enter Password:", type="password")
142
 
143
- st.image('mimtss.png', width=200)
144
- st.image('Literacy_Cover.png', width=200)
145
- st.link_button("View | Download", "https://mimtsstac.org/sites/default/files/session-documents/Intensifying%20Literacy%20Instruction%20-%20Essential%20Practices%20%28NATIONAL%29.pdf")
146
-
147
- Audio_Header_text = """
148
- **Tune into Dr. St. Martin's introduction**"""
149
- st.markdown(Audio_Header_text)
150
-
151
- # Path or URL to the audio file
152
- audio_file_path = 'Audio_Introduction_Literacy.m4a'
153
- # Display the audio player widget
154
- st.audio(audio_file_path, format='audio/mp4', start_time=0)
155
-
156
- # Citation text with Markdown formatting
157
- citation_Content_text = """
158
- **Citation**
159
- St. Martin, K., Vaughn, S., Troia, G., Fien, & H., Coyne, M. (2023). *Intensifying literacy instruction: Essential practices, Version 2.0*. Lansing, MI: MiMTSS Technical Assistance Center, Michigan Department of Education.
160
-
161
- **Table of Contents**
162
- * **Introduction**: pg. 1
163
- * **Intensifying Literacy Instruction: Essential Practices**: pg. 4
164
- * **Purpose**: pg. 4
165
- * **Practice 1**: Knowledge and Use of a Learning Progression for Developing Skilled Readers and Writers: pg. 6
166
- * **Practice 2**: Design and Use of an Intervention Platform as the Foundation for Effective Intervention: pg. 13
167
- * **Practice 3**: On-going Data-Based Decision Making for Providing and Intensifying Interventions: pg. 16
168
- * **Practice 4**: Adaptations to Increase the Instructional Intensity of the Intervention: pg. 20
169
- * **Practice 5**: Infrastructures to Support Students with Significant and Persistent Literacy Needs: pg. 24
170
- * **Motivation and Engagement**: pg. 28
171
- * **Considerations for Understanding How Students' Learning and Behavior are Enhanced**: pg. 28
172
- * **Summary**: pg. 29
173
- * **Endnotes**: pg. 30
174
- * **Acknowledgment**: pg. 39
175
- """
176
- st.markdown(citation_Content_text)
177
-
178
- # if password == correct_password:
179
- # Define a list of possible placeholder texts
180
- placeholders = [
181
- 'Example: Summarize the article in 200 words or less',
182
- 'Example: What are the essential practices?',
183
- 'Example: I am a teacher, why is this resource important?',
184
- 'Example: How can this resource support my instruction in reading and writing?',
185
- 'Example: Does this resource align with the learning progression for developing skilled readers and writers?',
186
- 'Example: How does this resource address the needs of students scoring below the 20th percentile?',
187
- 'Example: Are there assessment tools included in this resource to monitor student progress?',
188
- 'Example: Does this resource provide guidance on data collection and analysis for monitoring student outcomes?',
189
- "Example: How can this resource be used to support students' social-emotional development?",
190
- "Example: How does this resource align with the district's literacy goals and objectives?",
191
- 'Example: What research and evidence support the effectiveness of this resource?',
192
- 'Example: Does this resource provide guidance on implementation fidelity'
193
- ]
194
-
195
- # Select a random placeholder from the list
196
- if 'placeholder' not in st.session_state:
197
- st.session_state.placeholder = random.choice(placeholders)
198
-
199
-
200
- # CLEAR THE TEXT BOX
201
- with st.form("Question",clear_on_submit=True):
202
- q = st.text_input(label='Ask a Question | Send a Prompt', placeholder=st.session_state.placeholder, value='', )
203
- submitted = st.form_submit_button("Submit")
204
 
 
 
 
 
 
 
205
  st.divider()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
- if submitted:
208
- with st.spinner('Thinking...'):
209
- answer = ask_with_memory(vector_store, q, st.session_state.history)
210
-
211
- # st.write(q)
212
- st.write(f"**{q}**")
213
-
214
- import time
215
- import random
216
-
217
- def stream_answer():
218
- for word in answer.split(" "):
219
- yield word + " "
220
- # time.sleep(0.02)
221
- time.sleep(random.uniform(0.03, 0.08))
222
-
223
- st.write(stream_answer)
224
-
225
- # Display the response in a text area
226
- # st.text_area('Response: ', value=answer, height=400, key="response_text_area")
227
- # OR to display as Markdown (interprets Markdown formatting)
228
- # st.markdown(answer)
229
-
230
- st.success('Powered by MTSS GPT. AI can make mistakes. Consider checking important information.')
231
-
232
- st.divider()
233
-
234
- # # Prepare chat history text for display
235
- history_text = "\n\n".join(f"Q: {entry[0]}\nA: {entry[1]}" for entry in reversed(st.session_state.history))
236
-
237
- # Display chat history
238
- st.text_area('Chat History', value=history_text, height=800)
 
1
+ # # intervention_analysis_app.py
 
 
2
 
3
+ # import streamlit as st
4
+ # import pandas as pd
5
+ # # from transformers import pipeline
6
+ # from huggingface_hub import InferenceClient
7
+ # import os
8
+ # from pathlib import Path
9
+ # from dotenv import load_dotenv
10
 
11
+ # load_dotenv()
 
 
 
12
 
13
+ # # Set the Hugging Face API key
14
+ # # Retrieve Hugging Face API key from environment variables
15
+ # hf_api_key = os.getenv('HF_API_KEY')
16
+ # if not hf_api_key:
17
+ # raise ValueError("HF_API_KEY not set in environment variables")
18
 
19
+ # # Create the Hugging Face inference client
20
+ # client = InferenceClient(api_key=hf_api_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ # # Constants
23
+ # INTERVENTION_COLUMN = 'Did the intervention happen today?'
24
+ # ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)'
25
+ # PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)'
26
+ # NOT_ENGAGED_STR = 'Not Engaged (less than 50%)'
27
 
28
+ # def main():
29
+ # st.title("Intervention Program Analysis")
30
 
31
+ # # File uploader
32
+ # uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
 
 
33
 
34
+ # if uploaded_file is not None:
35
+ # try:
36
+ # # Read the Excel file into a DataFrame
37
+ # df = pd.read_excel(uploaded_file)
38
+ # st.subheader("Uploaded Data")
39
+ # st.write(df)
40
 
41
+ # # Ensure expected column is available
42
+ # if INTERVENTION_COLUMN not in df.columns:
43
+ # st.error(f"Expected column '{INTERVENTION_COLUMN}' not found.")
44
+ # return
45
 
46
+ # # Clean up column names
47
+ # df.columns = df.columns.str.strip()
48
 
49
+ # # Compute Intervention Session Statistics
50
+ # intervention_stats = compute_intervention_statistics(df)
51
+ # st.subheader("Intervention Session Statistics")
52
+ # st.write(intervention_stats)
53
+
54
+ # # Compute Student Metrics
55
+ # student_metrics_df = compute_student_metrics(df)
56
+ # st.subheader("Student Metrics")
57
+ # st.write(student_metrics_df)
58
+
59
+ # # Prepare input for the language model
60
+ # llm_input = prepare_llm_input(student_metrics_df)
61
+
62
+ # # Generate Notes and Recommendations using Hugging Face LLM
63
+ # recommendations = prompt_response_from_hf_llm(llm_input)
64
+
65
+ # st.subheader("AI Analysis")
66
+ # st.markdown(recommendations)
67
+
68
+ # except Exception as e:
69
+ # st.error(f"Error reading the file: {str(e)}")
70
+
71
+ # def compute_intervention_statistics(df):
72
+ # # Total Number of Days Available
73
+ # total_days = len(df)
74
+
75
+ # # Intervention Sessions Held
76
+ # sessions_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
77
+
78
+ # # Intervention Sessions Not Held
79
+ # sessions_not_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('no').sum()
80
+
81
+ # # Intervention Frequency (%)
82
+ # intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
83
+ # intervention_frequency = round(intervention_frequency, 2)
84
+
85
+ # # Create a DataFrame to display the statistics
86
+ # stats = {
87
+ # 'Total Number of Days Available': [total_days],
88
+ # 'Intervention Sessions Held': [sessions_held],
89
+ # 'Intervention Sessions Not Held': [sessions_not_held],
90
+ # 'Intervention Frequency (%)': [intervention_frequency]
91
+ # }
92
+ # stats_df = pd.DataFrame(stats)
93
+ # return stats_df
94
+
95
+ # def compute_student_metrics(df):
96
+ # # Filter DataFrame for sessions where intervention happened
97
+ # intervention_df = df[df[INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
98
+ # intervention_sessions_held = len(intervention_df)
99
+
100
+ # # Get list of student columns
101
+ # student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
102
+
103
+ # student_metrics = {}
104
+
105
+ # for col in student_columns:
106
+ # student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
107
+ # # Get the attendance data for the student
108
+ # student_data = intervention_df[[col]].copy()
109
+
110
+ # # Treat blank entries as 'Absent'
111
+ # student_data[col] = student_data[col].fillna('Absent')
112
+
113
+ # # Assign attendance values
114
+ # attendance_values = student_data[col].apply(lambda x: 1 if x in [
115
+ # ENGAGED_STR,
116
+ # PARTIALLY_ENGAGED_STR,
117
+ # NOT_ENGAGED_STR
118
+ # ] else 0)
119
+
120
+ # # Number of Sessions Attended
121
+ # sessions_attended = attendance_values.sum()
122
+
123
+ # # Attendance (%)
124
+ # attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
125
+ # attendance_pct = round(attendance_pct, 2)
126
+
127
+ # # For engagement calculation, include only sessions where attendance is not 'Absent'
128
+ # valid_engagement_indices = attendance_values[attendance_values == 1].index
129
+ # engagement_data = student_data.loc[valid_engagement_indices, col]
130
+
131
+ # # Assign engagement values
132
+ # engagement_values = engagement_data.apply(lambda x: 1 if x == ENGAGED_STR
133
+ # else 0.5 if x == PARTIALLY_ENGAGED_STR else 0)
134
+
135
+ # # Sum of Engagement Values
136
+ # sum_engagement_values = engagement_values.sum()
137
+
138
+ # # Number of Sessions Attended for engagement (should be same as sessions_attended)
139
+ # number_sessions_attended = len(valid_engagement_indices)
140
+
141
+ # # Engagement (%)
142
+ # engagement_pct = (sum_engagement_values / number_sessions_attended) * 100 if number_sessions_attended > 0 else 0
143
+ # engagement_pct = round(engagement_pct, 2)
144
+
145
+ # # Store metrics
146
+ # student_metrics[student_name] = {
147
+ # 'Attendance (%)': attendance_pct,
148
+ # 'Engagement (%)': engagement_pct
149
+ # }
150
+
151
+ # # Create a DataFrame from student_metrics
152
+ # student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
153
+ # student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
154
+ # return student_metrics_df
155
+
156
+ # def prepare_llm_input(student_metrics_df):
157
+ # # Convert the student metrics DataFrame to a string
158
+ # metrics_str = student_metrics_df.to_string(index=False)
159
+ # llm_input = f"""
160
+ # Based on the following student metrics:
161
+
162
+ # {metrics_str}
163
+
164
+ # Provide:
165
+
166
+ # 1. Notes and Key Takeaways: Summarize the data, highlight students with the lowest and highest attendance and engagement percentages, identify students who may need adjustments to their intervention due to low attendance or engagement, and highlight students who are showing strong performance.
167
+
168
+ # 2. Recommendations and Next Steps: Provide interpretations based on the analysis and suggest possible next steps or strategies to improve student outcomes.
169
+ # """
170
+ # return llm_input
171
+
172
+ # def prompt_response_from_hf_llm(llm_input):
173
+ # # Generate the refined prompt using Hugging Face API
174
+ # response = client.chat.completions.create(
175
+ # # model="mistralai/Mistral-7B-Instruct-v0.3",
176
+ # model="meta-llama/Llama-3.1-70B-Instruct",
177
+ # messages=[
178
+ # {"role": "user", "content": llm_input}
179
+ # ],
180
+ # stream=True,
181
+ # temperature=0.5,
182
+ # max_tokens=1024,
183
+ # top_p=0.7
184
+ # )
185
 
186
+ # # Combine messages if response is streamed
187
+ # response_content = ""
188
+ # for message in response:
189
+ # response_content += message.choices[0].delta.content
190
+
191
+ # return response_content.strip()
192
+
193
+ # if __name__ == '__main__':
194
+ # main()
195
+
196
+
197
+ # CHARTS
198
+ # # intervention_analysis_app.py
199
+
200
+ # import streamlit as st
201
+ # import pandas as pd
202
+ # import matplotlib.pyplot as plt
203
+ # # from transformers import pipeline
204
+ # from huggingface_hub import InferenceClient
205
+ # import os
206
+ # from pathlib import Path
207
+ # from dotenv import load_dotenv
208
+
209
+ # load_dotenv()
210
+
211
+ # # Set the Hugging Face API key
212
+ # # Retrieve Hugging Face API key from environment variables
213
+ # hf_api_key = os.getenv('HF_API_KEY')
214
+ # if not hf_api_key:
215
+ # raise ValueError("HF_API_KEY not set in environment variables")
216
+
217
+ # # Create the Hugging Face inference client
218
+ # client = InferenceClient(api_key=hf_api_key)
219
+
220
+ # # Constants
221
+ # INTERVENTION_COLUMN = 'Did the intervention happen today?'
222
+ # ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)'
223
+ # PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)'
224
+ # NOT_ENGAGED_STR = 'Not Engaged (less than 50%)'
225
+
226
+ # def main():
227
+ # st.title("Intervention Program Analysis")
228
+
229
+ # # File uploader
230
+ # uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
231
+
232
+ # if uploaded_file is not None:
233
+ # try:
234
+ # # Read the Excel file into a DataFrame
235
+ # df = pd.read_excel(uploaded_file)
236
+ # st.subheader("Uploaded Data")
237
+ # # st.write(df.head(4)) # Display only the first four rows
238
+ # st.write(df) # Display all
239
+
240
+ # # Ensure expected column is available
241
+ # if INTERVENTION_COLUMN not in df.columns:
242
+ # st.error(f"Expected column '{INTERVENTION_COLUMN}' not found.")
243
+ # return
244
+
245
+ # # Clean up column names
246
+ # df.columns = df.columns.str.strip()
247
+
248
+ # # Compute Intervention Session Statistics
249
+ # intervention_stats = compute_intervention_statistics(df)
250
+ # st.subheader("Intervention Session Statistics")
251
+ # st.write(intervention_stats)
252
+
253
+ # # Visualization for Intervention Session Statistics
254
+ # plot_intervention_statistics(intervention_stats)
255
+
256
+ # # Compute Student Metrics
257
+ # student_metrics_df = compute_student_metrics(df)
258
+ # st.subheader("Student Metrics")
259
+ # st.write(student_metrics_df)
260
+
261
+ # # Visualization for Student Metrics
262
+ # plot_student_metrics(student_metrics_df)
263
+
264
+ # # Prepare input for the language model
265
+ # llm_input = prepare_llm_input(student_metrics_df)
266
+
267
+ # # Generate Notes and Recommendations using Hugging Face LLM
268
+ # recommendations = prompt_response_from_hf_llm(llm_input)
269
+
270
+ # st.subheader("AI Analysis")
271
+ # st.markdown(recommendations)
272
+
273
+ # except Exception as e:
274
+ # st.error(f"Error reading the file: {str(e)}")
275
+
276
+ # def compute_intervention_statistics(df):
277
+ # # Total Number of Days Available
278
+ # total_days = len(df)
279
 
280
+ # # Intervention Sessions Held
281
+ # sessions_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
282
+
283
+ # # Intervention Sessions Not Held
284
+ # sessions_not_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('no').sum()
285
+
286
+ # # Intervention Frequency (%)
287
+ # intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
288
+ # intervention_frequency = round(intervention_frequency, 2)
289
+
290
+ # # Create a DataFrame to display the statistics
291
+ # stats = {
292
+ # 'Total Number of Days Available': [total_days],
293
+ # 'Intervention Sessions Held': [sessions_held],
294
+ # 'Intervention Sessions Not Held': [sessions_not_held],
295
+ # 'Intervention Frequency (%)': [intervention_frequency]
296
+ # }
297
+ # stats_df = pd.DataFrame(stats)
298
+ # return stats_df
299
+
300
+ # def plot_intervention_statistics(intervention_stats):
301
+ # # Create a stacked bar chart for sessions held and not held
302
+ # sessions_held = intervention_stats['Intervention Sessions Held'].values[0]
303
+ # sessions_not_held = intervention_stats['Intervention Sessions Not Held'].values[0]
304
+
305
+ # fig, ax = plt.subplots()
306
+ # ax.bar(['Intervention Sessions'], [sessions_not_held], label='Not Held', color='#358E66')
307
+ # ax.bar(['Intervention Sessions'], [sessions_held], bottom=[sessions_not_held], label='Held', color='#91D6B8')
308
+
309
+ # # Display the values on the bars
310
+ # ax.text(0, sessions_not_held / 2, str(sessions_not_held), ha='center', va='center', color='white')
311
+ # ax.text(0, sessions_not_held + sessions_held / 2, str(sessions_held), ha='center', va='center', color='black')
312
+
313
+ # ax.set_ylabel('Number of Sessions')
314
+ # ax.set_title('Intervention Sessions Held vs Not Held')
315
+ # ax.legend()
316
+
317
+ # st.pyplot(fig)
318
+
319
+ # def compute_student_metrics(df):
320
+ # # Filter DataFrame for sessions where intervention happened
321
+ # intervention_df = df[df[INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
322
+ # intervention_sessions_held = len(intervention_df)
323
+
324
+ # # Get list of student columns
325
+ # student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
326
+
327
+ # student_metrics = {}
328
+
329
+ # for col in student_columns:
330
+ # student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
331
+ # # Get the attendance data for the student
332
+ # student_data = intervention_df[[col]].copy()
333
+
334
+ # # Treat blank entries as 'Absent'
335
+ # student_data[col] = student_data[col].fillna('Absent')
336
+
337
+ # # Assign attendance values
338
+ # attendance_values = student_data[col].apply(lambda x: 1 if x in [
339
+ # ENGAGED_STR,
340
+ # PARTIALLY_ENGAGED_STR,
341
+ # NOT_ENGAGED_STR
342
+ # ] else 0)
343
+
344
+ # # Number of Sessions Attended
345
+ # sessions_attended = attendance_values.sum()
346
+
347
+ # # Attendance (%)
348
+ # attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
349
+ # attendance_pct = round(attendance_pct, 2)
350
+
351
+ # # For engagement calculation, include only sessions where attendance is not 'Absent'
352
+ # valid_engagement_indices = attendance_values[attendance_values == 1].index
353
+ # engagement_data = student_data.loc[valid_engagement_indices, col]
354
+
355
+ # # Assign engagement values
356
+ # engagement_values = engagement_data.apply(lambda x: 1 if x == ENGAGED_STR
357
+ # else 0.5 if x == PARTIALLY_ENGAGED_STR else 0)
358
+
359
+ # # Sum of Engagement Values
360
+ # sum_engagement_values = engagement_values.sum()
361
+
362
+ # # Number of Sessions Attended for engagement (should be same as sessions_attended)
363
+ # number_sessions_attended = len(valid_engagement_indices)
364
+
365
+ # # Engagement (%)
366
+ # engagement_pct = (sum_engagement_values / number_sessions_attended) * 100 if number_sessions_attended > 0 else 0
367
+ # engagement_pct = round(engagement_pct, 2)
368
+
369
+ # # Store metrics
370
+ # student_metrics[student_name] = {
371
+ # 'Attendance (%)': attendance_pct,
372
+ # 'Engagement (%)': engagement_pct
373
+ # }
374
+
375
+ # # Create a DataFrame from student_metrics
376
+ # student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
377
+ # student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
378
+ # return student_metrics_df
379
+
380
+ # def plot_student_metrics(student_metrics_df):
381
+ # # Create a line graph for attendance and engagement
382
+ # fig, ax = plt.subplots()
383
+
384
+ # # Plotting Attendance and Engagement with specific colors
385
+ # ax.plot(student_metrics_df['Student'], student_metrics_df['Attendance (%)'], marker='o', color='#005288', label='Attendance (%)')
386
+ # ax.plot(student_metrics_df['Student'], student_metrics_df['Engagement (%)'], marker='o', color='#3AB0FF', label='Engagement (%)')
387
+
388
+ # ax.set_xlabel('Student')
389
+ # ax.set_ylabel('Percentage (%)')
390
+ # ax.set_title('Student Attendance and Engagement Metrics')
391
+ # ax.legend()
392
+ # plt.xticks(rotation=45)
393
+
394
+ # st.pyplot(fig)
395
+
396
+ # def prepare_llm_input(student_metrics_df):
397
+ # # Convert the student metrics DataFrame to a string
398
+ # metrics_str = student_metrics_df.to_string(index=False)
399
+ # llm_input = f"""
400
+ # Based on the following student metrics:
401
+
402
+ # {metrics_str}
403
+
404
+ # Provide:
405
+
406
+ # 1. Notes and Key Takeaways: Summarize the data, highlight students with the lowest and highest attendance and engagement percentages, identify students who may need adjustments to their intervention due to low attendance or engagement, and highlight students who are showing strong performance.
407
+
408
+ # 2. Recommendations and Next Steps: Provide interpretations based on the analysis and suggest possible next steps or strategies to improve student outcomes.
409
+ # """
410
+ # return llm_input
411
+
412
+ # def prompt_response_from_hf_llm(llm_input):
413
+ # # Generate the refined prompt using Hugging Face API
414
+ # response = client.chat.completions.create(
415
+ # # model="mistralai/Mistral-7B-Instruct-v0.3",
416
+ # model="meta-llama/Llama-3.1-70B-Instruct",
417
+ # messages=[
418
+ # {"role": "user", "content": llm_input}
419
+ # ],
420
+ # stream=True,
421
+ # temperature=0.5,
422
+ # max_tokens=1024,
423
+ # top_p=0.7
424
+ # )
425
 
426
+ # # Combine messages if response is streamed
427
+ # response_content = ""
428
+ # for message in response:
429
+ # response_content += message.choices[0].delta.content
430
+
431
+ # return response_content.strip()
432
+
433
+ # if __name__ == '__main__':
434
+ # main()
435
+
436
+
437
+
438
+
439
+
440
+ # CHARTS + DOWNLOAD
441
+ # # intervention_analysis_app.py
442
+
443
+ # import streamlit as st
444
+ # import pandas as pd
445
+ # import matplotlib.pyplot as plt
446
+ # import io
447
+ # # from transformers import pipeline
448
+ # from huggingface_hub import InferenceClient
449
+ # import os
450
+ # from pathlib import Path
451
+ # from dotenv import load_dotenv
452
+
453
+ # load_dotenv()
454
+
455
+ # # Set the Hugging Face API key
456
+ # # Retrieve Hugging Face API key from environment variables
457
+ # hf_api_key = os.getenv('HF_API_KEY')
458
+ # if not hf_api_key:
459
+ # raise ValueError("HF_API_KEY not set in environment variables")
460
+
461
+ # # Create the Hugging Face inference client
462
+ # client = InferenceClient(api_key=hf_api_key)
463
+
464
+ # # Constants
465
+ # INTERVENTION_COLUMN = 'Did the intervention happen today?'
466
+ # ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)'
467
+ # PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)'
468
+ # NOT_ENGAGED_STR = 'Not Engaged (less than 50%)'
469
+
470
+ # def main():
471
+ # st.title("Intervention Program Analysis")
472
+
473
+ # # File uploader
474
+ # uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
475
+
476
+ # if uploaded_file is not None:
477
+ # try:
478
+ # # Read the Excel file into a DataFrame
479
+ # df = pd.read_excel(uploaded_file)
480
+ # st.subheader("Uploaded Data")
481
+ # st.write(df.head(4)) # Display only the first four rows
482
+
483
+ # # Ensure expected column is available
484
+ # if INTERVENTION_COLUMN not in df.columns:
485
+ # st.error(f"Expected column '{INTERVENTION_COLUMN}' not found.")
486
+ # return
487
+
488
+ # # Clean up column names
489
+ # df.columns = df.columns.str.strip()
490
+
491
+ # # Compute Intervention Session Statistics
492
+ # intervention_stats = compute_intervention_statistics(df)
493
+ # st.subheader("Intervention Session Statistics")
494
+ # st.write(intervention_stats)
495
+
496
+ # # Visualization for Intervention Session Statistics
497
+ # intervention_fig = plot_intervention_statistics(intervention_stats)
498
+
499
+ # # Add download button for Intervention Session Statistics chart
500
+ # download_chart(intervention_fig, "intervention_statistics_chart.png")
501
+
502
+ # # Compute Student Metrics
503
+ # student_metrics_df = compute_student_metrics(df)
504
+ # st.subheader("Student Metrics")
505
+ # st.write(student_metrics_df)
506
+
507
+ # # Visualization for Student Metrics
508
+ # student_metrics_fig = plot_student_metrics(student_metrics_df)
509
+
510
+ # # Add download button for Student Metrics chart
511
+ # download_chart(student_metrics_fig, "student_metrics_chart.png")
512
+
513
+ # # Prepare input for the language model
514
+ # llm_input = prepare_llm_input(student_metrics_df)
515
+
516
+ # # Generate Notes and Recommendations using Hugging Face LLM
517
+ # with st.spinner("Generating AI analysis..."):
518
+ # recommendations = prompt_response_from_hf_llm(llm_input)
519
+
520
+ # st.subheader("AI Analysis")
521
+ # st.markdown(recommendations)
522
+
523
+ # # Add download button for LLM output
524
+ # download_llm_output(recommendations, "llm_output.txt")
525
+
526
+ # except Exception as e:
527
+ # st.error(f"Error reading the file: {str(e)}")
528
+
529
+ # def compute_intervention_statistics(df):
530
+ # # Total Number of Days Available
531
+ # total_days = len(df)
532
+
533
+ # # Intervention Sessions Held
534
+ # sessions_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
535
+
536
+ # # Intervention Sessions Not Held
537
+ # sessions_not_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('no').sum()
538
 
539
+ # # Intervention Frequency (%)
540
+ # intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
541
+ # intervention_frequency = round(intervention_frequency, 2)
542
+
543
+ # # Create a DataFrame to display the statistics
544
+ # stats = {
545
+ # 'Total Number of Days Available': [total_days],
546
+ # 'Intervention Sessions Held': [sessions_held],
547
+ # 'Intervention Sessions Not Held': [sessions_not_held],
548
+ # 'Intervention Frequency (%)': [intervention_frequency]
549
+ # }
550
+ # stats_df = pd.DataFrame(stats)
551
+ # return stats_df
552
+
553
+ # def plot_intervention_statistics(intervention_stats):
554
+ # # Create a stacked bar chart for sessions held and not held
555
+ # sessions_held = intervention_stats['Intervention Sessions Held'].values[0]
556
+ # sessions_not_held = intervention_stats['Intervention Sessions Not Held'].values[0]
557
+
558
+ # fig, ax = plt.subplots()
559
+ # ax.bar(['Intervention Sessions'], [sessions_not_held], label='Not Held', color='#358E66')
560
+ # ax.bar(['Intervention Sessions'], [sessions_held], bottom=[sessions_not_held], label='Held', color='#91D6B8')
561
+
562
+ # # Display the values on the bars
563
+ # ax.text(0, sessions_not_held / 2, str(sessions_not_held), ha='center', va='center', color='white')
564
+ # ax.text(0, sessions_not_held + sessions_held / 2, str(sessions_held), ha='center', va='center', color='black')
565
+
566
+ # ax.set_ylabel('Number of Sessions')
567
+ # ax.set_title('Intervention Sessions Held vs Not Held')
568
+ # ax.legend()
569
+
570
+ # st.pyplot(fig)
571
+
572
+ # return fig
573
+
574
+ # def compute_student_metrics(df):
575
+ # # Filter DataFrame for sessions where intervention happened
576
+ # intervention_df = df[df[INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
577
+ # intervention_sessions_held = len(intervention_df)
578
+
579
+ # # Get list of student columns
580
+ # student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
581
+
582
+ # student_metrics = {}
583
+
584
+ # for col in student_columns:
585
+ # student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
586
+ # # Get the attendance data for the student
587
+ # student_data = intervention_df[[col]].copy()
588
+
589
+ # # Treat blank entries as 'Absent'
590
+ # student_data[col] = student_data[col].fillna('Absent')
591
+
592
+ # # Assign attendance values
593
+ # attendance_values = student_data[col].apply(lambda x: 1 if x in [
594
+ # ENGAGED_STR,
595
+ # PARTIALLY_ENGAGED_STR,
596
+ # NOT_ENGAGED_STR
597
+ # ] else 0)
598
+
599
+ # # Number of Sessions Attended
600
+ # sessions_attended = attendance_values.sum()
601
+
602
+ # # Attendance (%)
603
+ # attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
604
+ # attendance_pct = round(attendance_pct, 2)
605
+
606
+ # # For engagement calculation, include only sessions where attendance is not 'Absent'
607
+ # valid_engagement_indices = attendance_values[attendance_values == 1].index
608
+ # engagement_data = student_data.loc[valid_engagement_indices, col]
609
+
610
+ # # Assign engagement values
611
+ # engagement_values = engagement_data.apply(lambda x: 1 if x == ENGAGED_STR
612
+ # else 0.5 if x == PARTIALLY_ENGAGED_STR else 0)
613
+
614
+ # # Sum of Engagement Values
615
+ # sum_engagement_values = engagement_values.sum()
616
+
617
+ # # Number of Sessions Attended for engagement (should be same as sessions_attended)
618
+ # number_sessions_attended = len(valid_engagement_indices)
619
+
620
+ # # Engagement (%)
621
+ # engagement_pct = (sum_engagement_values / number_sessions_attended) * 100 if number_sessions_attended > 0 else 0
622
+ # engagement_pct = round(engagement_pct, 2)
623
+
624
+ # # Store metrics
625
+ # student_metrics[student_name] = {
626
+ # 'Attendance (%)': attendance_pct,
627
+ # 'Engagement (%)': engagement_pct
628
+ # }
629
+
630
+ # # Create a DataFrame from student_metrics
631
+ # student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
632
+ # student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
633
+ # return student_metrics_df
634
+
635
+ # def plot_student_metrics(student_metrics_df):
636
+ # # Create a line graph for attendance and engagement
637
+ # fig, ax = plt.subplots()
638
+
639
+ # # Plotting Attendance and Engagement with specific colors
640
+ # ax.plot(student_metrics_df['Student'], student_metrics_df['Attendance (%)'], marker='o', color='#005288', label='Attendance (%)')
641
+ # ax.plot(student_metrics_df['Student'], student_metrics_df['Engagement (%)'], marker='o', color='#3AB0FF', label='Engagement (%)')
642
+
643
+ # ax.set_xlabel('Student')
644
+ # ax.set_ylabel('Percentage (%)')
645
+ # ax.set_title('Student Attendance and Engagement Metrics')
646
+ # ax.legend()
647
+ # plt.xticks(rotation=45)
648
+
649
+ # st.pyplot(fig)
650
+
651
+ # return fig
652
+
653
+ # def download_chart(fig, filename):
654
+ # # Create a buffer to hold the image data
655
+ # buffer = io.BytesIO()
656
+ # # Save the figure to the buffer
657
+ # fig.savefig(buffer, format='png')
658
+ # # Set the file pointer to the beginning
659
+ # buffer.seek(0)
660
+ # # Add a download button to Streamlit
661
+ # st.download_button(label="Download Chart", data=buffer, file_name=filename, mime='image/png')
662
+
663
+ # def download_llm_output(content, filename):
664
+ # # Create a buffer to hold the text data
665
+ # buffer = io.BytesIO()
666
+ # buffer.write(content.encode('utf-8'))
667
+ # buffer.seek(0)
668
+ # # Add a download button to Streamlit
669
+ # st.download_button(label="Download LLM Output", data=buffer, file_name=filename, mime='text/plain')
670
+
671
+ # def prepare_llm_input(student_metrics_df):
672
+ # # Convert the student metrics DataFrame to a string
673
+ # metrics_str = student_metrics_df.to_string(index=False)
674
+ # llm_input = f"""
675
+ # Based on the following student metrics:
676
+
677
+ # {metrics_str}
678
+
679
+ # Provide:
680
+
681
+ # 1. Notes and Key Takeaways: Summarize the data, highlight students with the lowest and highest attendance and engagement percentages, identify students who may need adjustments to their intervention due to low attendance or engagement, and highlight students who are showing strong performance.
682
+
683
+ # 2. Recommendations and Next Steps: Provide interpretations based on the analysis and suggest possible next steps or strategies to improve student outcomes.
684
+ # """
685
+ # return llm_input
686
+
687
+ # def prompt_response_from_hf_llm(llm_input):
688
+ # # Generate the refined prompt using Hugging Face API
689
+ # response = client.chat.completions.create(
690
+ # model="meta-llama/Llama-3.1-70B-Instruct",
691
+ # messages=[
692
+ # {"role": "user", "content": llm_input}
693
+ # ],
694
+ # stream=True,
695
+ # temperature=0.5,
696
+ # max_tokens=1024,
697
+ # top_p=0.7
698
+ # )
699
 
700
+ # # Combine messages if response is streamed
701
+ # response_content = ""
702
+ # for message in response:
703
+ # response_content += message.choices[0].delta.content
704
+
705
+ # return response_content.strip()
706
 
707
+ # if __name__ == '__main__':
708
+ # main()
709
+
710
+
711
+ # CHARTS + DOWNLOAD + NO NAMES
712
+ # intervention_analysis_app.py
713
+
714
+ #------------------------------------------------------------------------
715
+ # Import Modules
716
+ #------------------------------------------------------------------------
717
+ import streamlit as st
718
+ import pandas as pd
719
+ import matplotlib.pyplot as plt
720
+ import io
721
+ import re
722
+ # from transformers import pipeline
723
+ from huggingface_hub import InferenceClient
724
+ import os
725
+ from pathlib import Path
726
+ from dotenv import load_dotenv
727
 
728
+ load_dotenv()
 
 
 
729
 
730
+ #------------------------------------------------------------------------
731
+ # Configurations
732
+ #------------------------------------------------------------------------
733
+ # Streamlit page setup
734
+ st.set_page_config(
735
+ page_title="Intervention Program Analysis",
736
+ page_icon=":bar_chart:",
737
+ layout="centered",
738
+ initial_sidebar_state="auto",
739
+ menu_items={
740
+ 'Get Help': 'mailto:[email protected]',
741
+ 'About': "This app is built to support spreadsheet analysis"
742
+ }
743
+ )
744
 
745
+ #------------------------------------------------------------------------
746
+ # Sidebar
747
+ #------------------------------------------------------------------------
748
  with st.sidebar:
749
  # Password input field
750
  # password = st.text_input("Enter Password:", type="password")
751
 
752
+ # Set the desired width in pixels
753
+ image_width = 300
754
+ # Define the path to the image
755
+ image_path = "/Users/cheynelevesseur/Desktop/Manual Library/Python_Code/LLM_Projects_1/intervention_analysis_app/mimtss_logo.png"
756
+ # Display the image
757
+ st.image(image_path, width=image_width)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
+ # Toggle for Help and Report a Bug
760
+ with st.expander("Need help and report a bug"):
761
+ st.write("""
762
+ **Contact**: Cheyne LeVesseur, PhD
763
+ **Email**: [email protected]
764
+ """)
765
  st.divider()
766
+ st.subheader('User Instructions')
767
+
768
+ # Principles text with Markdown formatting
769
+ User_Instructions = """
770
+
771
+ - **Step 1**: Upload your Excel file.
772
+ - **Step 2**: Anonymization – student names are replaced with initials for privacy.
773
+ - **Step 3**: Review anonymized data.
774
+ - **Step 4**: View **intervention session statistics**.
775
+ - **Step 5**: Review **student attendance and engagement metrics**.
776
+ - **Step 6**: Review AI-generated **insights and recommendations**.
777
+
778
+ ### **Privacy Assurance**
779
+ - **No full names** are ever displayed or sent to the AI model—only initials are used.
780
+ - This ensures that sensitive data remains protected throughout the entire process.
781
+
782
+ ### **Detailed Instructions**
783
+
784
+ #### **1. Upload Your Excel File**
785
+ - Start by uploading an Excel file that contains intervention data.
786
+ - Click on the **“Upload your Excel file”** button and select your `.xlsx` file from your computer.
787
+
788
+ **Note**: Your file should have columns like "Did the intervention happen today?" and "Student Attendance [FirstName LastName]" for the analysis to work correctly.
789
+
790
+ #### **2. Automated Name Anonymization**
791
+ - Once the file is uploaded, the app will **automatically replace student names with initials** in the "Student Attendance" columns.
792
+ - For example, **"Student Attendance [Cheyne LeVesseur]"** will be displayed as **"Student Attendance [CL]"**.
793
+ - If the student only has a first name, like **"Student Attendance [Cheyne]"**, it will be displayed as **"Student Attendance [C]"**.
794
+ - This anonymization helps to **protect student privacy**, ensuring that full names are not visible or sent to the AI language model.
795
+
796
+ #### **3. Review the Uploaded Data**
797
+ - You will see the entire table of anonymized data to verify that the information has been uploaded correctly and that names have been replaced with initials.
798
+
799
+ #### **4. Intervention Session Statistics**
800
+ - The app will calculate and display statistics related to intervention sessions, such as:
801
+ - **Total Number of Days Available**
802
+ - **Intervention Sessions Held**
803
+ - **Intervention Sessions Not Held**
804
+ - **Intervention Frequency (%)**
805
+ - A **stacked bar chart** will be shown to visualize the number of sessions held versus not held.
806
+ - If you need to save the visualization, click the **“Download Chart”** button to download it as a `.png` file.
807
+
808
+ #### **5. Student Metrics Analysis**
809
+ - The app will also calculate metrics for each student:
810
+ - **Attendance (%)** – The percentage of intervention sessions attended.
811
+ - **Engagement (%)** – The level of engagement during attended sessions.
812
+ - These metrics will be presented in a **line graph** that shows attendance and engagement for each student.
813
+ - You can click the **“Download Chart”** button to download the visualization as a `.png` file.
814
+
815
+ #### **6. Generate AI Analysis and Recommendations**
816
+ - The app will prepare data from the student metrics to provide notes, key takeaways, and suggestions for improving outcomes using an **AI language model**.
817
+ - You will see a **spinner** labeled **“Generating AI analysis…”** while the AI processes the data.
818
+ - This step may take a little longer, but the spinner ensures you know that the system is working.
819
+ - Once the analysis is complete, the AI's recommendations will be displayed under **"AI Analysis"**.
820
+ - You can click the **“Download LLM Output”** button to download the AI-generated recommendations as a `.txt` file for future reference.
821
+
822
+ """
823
+ st.markdown(User_Instructions)
824
+
825
+ #------------------------------------------------------------------------
826
+ # Functions
827
+ #------------------------------------------------------------------------
828
+ # Set the Hugging Face API key
829
+ # Retrieve Hugging Face API key from environment variables
830
+ hf_api_key = os.getenv('HF_API_KEY')
831
+ if not hf_api_key:
832
+ raise ValueError("HF_API_KEY not set in environment variables")
833
+
834
+ # Create the Hugging Face inference client
835
+ client = InferenceClient(api_key=hf_api_key)
836
+
837
+ # Constants
838
+ INTERVENTION_COLUMN = 'Did the intervention happen today?'
839
+ ENGAGED_STR = 'Engaged (Respect, Responsibility, Effort)'
840
+ PARTIALLY_ENGAGED_STR = 'Partially Engaged (about 50%)'
841
+ NOT_ENGAGED_STR = 'Not Engaged (less than 50%)'
842
+
843
+ def main():
844
+ st.title("Intervention Program Analysis")
845
+
846
+ # File uploader
847
+ uploaded_file = st.file_uploader("Upload your Excel file", type=["xlsx"])
848
+
849
+ if uploaded_file is not None:
850
+ try:
851
+ # Read the Excel file into a DataFrame
852
+ df = pd.read_excel(uploaded_file)
853
+
854
+ # Replace student names with initials
855
+ df = replace_student_names_with_initials(df)
856
+
857
+ st.subheader("Uploaded Data")
858
+ st.write(df.head(4)) # Display only the first four rows
859
+
860
+ # Ensure expected column is available
861
+ if INTERVENTION_COLUMN not in df.columns:
862
+ st.error(f"Expected column '{INTERVENTION_COLUMN}' not found.")
863
+ return
864
+
865
+ # Clean up column names
866
+ df.columns = df.columns.str.strip()
867
+
868
+ # Compute Intervention Session Statistics
869
+ intervention_stats = compute_intervention_statistics(df)
870
+ st.subheader("Intervention Session Statistics")
871
+ st.write(intervention_stats)
872
+
873
+ # Visualization for Intervention Session Statistics
874
+ intervention_fig = plot_intervention_statistics(intervention_stats)
875
+
876
+ # Add download button for Intervention Session Statistics chart
877
+ download_chart(intervention_fig, "intervention_statistics_chart.png")
878
+
879
+ # Compute Student Metrics
880
+ student_metrics_df = compute_student_metrics(df)
881
+ st.subheader("Student Metrics")
882
+ st.write(student_metrics_df)
883
+
884
+ # Visualization for Student Metrics
885
+ student_metrics_fig = plot_student_metrics(student_metrics_df)
886
+
887
+ # Add download button for Student Metrics chart
888
+ download_chart(student_metrics_fig, "student_metrics_chart.png")
889
+
890
+ # Prepare input for the language model
891
+ llm_input = prepare_llm_input(student_metrics_df)
892
+
893
+ # Generate Notes and Recommendations using Hugging Face LLM
894
+ with st.spinner("Generating AI analysis..."):
895
+ recommendations = prompt_response_from_hf_llm(llm_input)
896
+
897
+ st.subheader("AI Analysis")
898
+ st.markdown(recommendations)
899
+
900
+ # Add download button for LLM output
901
+ download_llm_output(recommendations, "llm_output.txt")
902
+
903
+ except Exception as e:
904
+ st.error(f"Error reading the file: {str(e)}")
905
+
906
+ def replace_student_names_with_initials(df):
907
+ """Replace student names in column headers with initials."""
908
+ updated_columns = []
909
+ for col in df.columns:
910
+ if col.startswith('Student Attendance'):
911
+ # Extract the name from the column header
912
+ match = re.match(r'Student Attendance \[(.+?)\]', col)
913
+ if match:
914
+ name = match.group(1)
915
+ # Split the name into parts (first and last name)
916
+ name_parts = name.split()
917
+ # Convert the name to initials
918
+ if len(name_parts) == 1:
919
+ initials = name_parts[0][0] # Just take the first letter
920
+ else:
921
+ initials = ''.join([part[0] for part in name_parts]) # Take the first letter of each part
922
+ # Update the column name
923
+ updated_columns.append(f'Student Attendance [{initials}]')
924
+ else:
925
+ updated_columns.append(col)
926
+ else:
927
+ updated_columns.append(col)
928
+ df.columns = updated_columns
929
+ return df
930
+
931
+ def compute_intervention_statistics(df):
932
+ # Total Number of Days Available
933
+ total_days = len(df)
934
+
935
+ # Intervention Sessions Held
936
+ sessions_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('yes').sum()
937
+
938
+ # Intervention Sessions Not Held
939
+ sessions_not_held = df[INTERVENTION_COLUMN].str.strip().str.lower().eq('no').sum()
940
+
941
+ # Intervention Frequency (%)
942
+ intervention_frequency = (sessions_held / total_days) * 100 if total_days > 0 else 0
943
+ intervention_frequency = round(intervention_frequency, 2)
944
+
945
+ # Create a DataFrame to display the statistics
946
+ stats = {
947
+ 'Total Number of Days Available': [total_days],
948
+ 'Intervention Sessions Held': [sessions_held],
949
+ 'Intervention Sessions Not Held': [sessions_not_held],
950
+ 'Intervention Frequency (%)': [intervention_frequency]
951
+ }
952
+ stats_df = pd.DataFrame(stats)
953
+ return stats_df
954
+
955
+ def plot_intervention_statistics(intervention_stats):
956
+ # Create a stacked bar chart for sessions held and not held
957
+ sessions_held = intervention_stats['Intervention Sessions Held'].values[0]
958
+ sessions_not_held = intervention_stats['Intervention Sessions Not Held'].values[0]
959
+
960
+ fig, ax = plt.subplots()
961
+ ax.bar(['Intervention Sessions'], [sessions_not_held], label='Not Held', color='#358E66')
962
+ ax.bar(['Intervention Sessions'], [sessions_held], bottom=[sessions_not_held], label='Held', color='#91D6B8')
963
+
964
+ # Display the values on the bars
965
+ ax.text(0, sessions_not_held / 2, str(sessions_not_held), ha='center', va='center', color='white')
966
+ ax.text(0, sessions_not_held + sessions_held / 2, str(sessions_held), ha='center', va='center', color='black')
967
+
968
+ ax.set_ylabel('Number of Sessions')
969
+ ax.set_title('Intervention Sessions Held vs Not Held')
970
+ ax.legend()
971
+
972
+ st.pyplot(fig)
973
+
974
+ return fig
975
+
976
+ def compute_student_metrics(df):
977
+ # Filter DataFrame for sessions where intervention happened
978
+ intervention_df = df[df[INTERVENTION_COLUMN].str.strip().str.lower() == 'yes']
979
+ intervention_sessions_held = len(intervention_df)
980
+
981
+ # Get list of student columns
982
+ student_columns = [col for col in df.columns if col.startswith('Student Attendance')]
983
+
984
+ student_metrics = {}
985
+
986
+ for col in student_columns:
987
+ student_name = col.replace('Student Attendance [', '').replace(']', '').strip()
988
+ # Get the attendance data for the student
989
+ student_data = intervention_df[[col]].copy()
990
+
991
+ # Treat blank entries as 'Absent'
992
+ student_data[col] = student_data[col].fillna('Absent')
993
+
994
+ # Assign attendance values
995
+ attendance_values = student_data[col].apply(lambda x: 1 if x in [
996
+ ENGAGED_STR,
997
+ PARTIALLY_ENGAGED_STR,
998
+ NOT_ENGAGED_STR
999
+ ] else 0)
1000
+
1001
+ # Number of Sessions Attended
1002
+ sessions_attended = attendance_values.sum()
1003
+
1004
+ # Attendance (%)
1005
+ attendance_pct = (sessions_attended / intervention_sessions_held) * 100 if intervention_sessions_held > 0 else 0
1006
+ attendance_pct = round(attendance_pct, 2)
1007
+
1008
+ # For engagement calculation, include only sessions where attendance is not 'Absent'
1009
+ valid_engagement_indices = attendance_values[attendance_values == 1].index
1010
+ engagement_data = student_data.loc[valid_engagement_indices, col]
1011
+
1012
+ # Assign engagement values
1013
+ engagement_values = engagement_data.apply(lambda x: 1 if x == ENGAGED_STR
1014
+ else 0.5 if x == PARTIALLY_ENGAGED_STR else 0)
1015
+
1016
+ # Sum of Engagement Values
1017
+ sum_engagement_values = engagement_values.sum()
1018
+
1019
+ # Number of Sessions Attended for engagement (should be same as sessions_attended)
1020
+ number_sessions_attended = len(valid_engagement_indices)
1021
+
1022
+ # Engagement (%)
1023
+ engagement_pct = (sum_engagement_values / number_sessions_attended) * 100 if number_sessions_attended > 0 else 0
1024
+ engagement_pct = round(engagement_pct, 2)
1025
+
1026
+ # Store metrics
1027
+ student_metrics[student_name] = {
1028
+ 'Attendance (%)': attendance_pct,
1029
+ 'Engagement (%)': engagement_pct
1030
+ }
1031
+
1032
+ # Create a DataFrame from student_metrics
1033
+ student_metrics_df = pd.DataFrame.from_dict(student_metrics, orient='index').reset_index()
1034
+ student_metrics_df.rename(columns={'index': 'Student'}, inplace=True)
1035
+ return student_metrics_df
1036
+
1037
+ def plot_student_metrics(student_metrics_df):
1038
+ # Create a line graph for attendance and engagement
1039
+ fig, ax = plt.subplots()
1040
+
1041
+ # Plotting Attendance and Engagement with specific colors
1042
+ ax.plot(student_metrics_df['Student'], student_metrics_df['Attendance (%)'], marker='o', color='#005288', label='Attendance (%)')
1043
+ ax.plot(student_metrics_df['Student'], student_metrics_df['Engagement (%)'], marker='o', color='#3AB0FF', label='Engagement (%)')
1044
+
1045
+ ax.set_xlabel('Student')
1046
+ ax.set_ylabel('Percentage (%)')
1047
+ ax.set_title('Student Attendance and Engagement Metrics')
1048
+ ax.legend()
1049
+ plt.xticks(rotation=45)
1050
+
1051
+ st.pyplot(fig)
1052
+
1053
+ return fig
1054
+
1055
+ def download_chart(fig, filename):
1056
+ # Create a buffer to hold the image data
1057
+ buffer = io.BytesIO()
1058
+ # Save the figure to the buffer
1059
+ fig.savefig(buffer, format='png')
1060
+ # Set the file pointer to the beginning
1061
+ buffer.seek(0)
1062
+ # Add a download button to Streamlit
1063
+ st.download_button(label="Download Chart", data=buffer, file_name=filename, mime='image/png')
1064
+
1065
+ def download_llm_output(content, filename):
1066
+ # Create a buffer to hold the text data
1067
+ buffer = io.BytesIO()
1068
+ buffer.write(content.encode('utf-8'))
1069
+ buffer.seek(0)
1070
+ # Add a download button to Streamlit
1071
+ st.download_button(label="Download LLM Output", data=buffer, file_name=filename, mime='text/plain')
1072
+
1073
+ def prepare_llm_input(student_metrics_df):
1074
+ # Convert the student metrics DataFrame to a string
1075
+ metrics_str = student_metrics_df.to_string(index=False)
1076
+ llm_input = f"""
1077
+ Based on the following student metrics:
1078
+
1079
+ {metrics_str}
1080
+
1081
+ Provide:
1082
+
1083
+ 1. Notes and Key Takeaways: Summarize the data, highlight students with the lowest and highest attendance and engagement percentages, identify students who may need adjustments to their intervention due to low attendance or engagement, and highlight students who are showing strong performance.
1084
+
1085
+ 2. Recommendations and Next Steps: Provide interpretations based on the analysis and suggest possible next steps or strategies to improve student outcomes.
1086
+ """
1087
+ return llm_input
1088
+
1089
+ def prompt_response_from_hf_llm(llm_input):
1090
+ # Generate the refined prompt using Hugging Face API
1091
+ response = client.chat.completions.create(
1092
+ model="meta-llama/Llama-3.1-70B-Instruct",
1093
+ messages=[
1094
+ {"role": "user", "content": llm_input}
1095
+ ],
1096
+ stream=True,
1097
+ temperature=0.5,
1098
+ max_tokens=1024,
1099
+ top_p=0.7
1100
+ )
1101
 
1102
+ # Combine messages if response is streamed
1103
+ response_content = ""
1104
+ for message in response:
1105
+ response_content += message.choices[0].delta.content
1106
+
1107
+ return response_content.strip()
1108
+
1109
+ if __name__ == '__main__':
1110
+ main()