turanhasan commited on
Commit
8e34cf7
·
verified ·
1 Parent(s): 533ff7a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +234 -0
app.py ADDED
@@ -0,0 +1,234 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import google.generativeai as genai
4
+ from google.ai.generativelanguage_v1beta.types import content
5
+ import json
6
+ from tempfile import NamedTemporaryFile
7
+
8
+ # Initialize session state for chat history if it doesn't exist
9
+ if 'chat_history' not in st.session_state:
10
+ st.session_state.chat_history = []
11
+
12
+ if 'diarization_output' not in st.session_state:
13
+ st.session_state.diarization_output = None
14
+
15
+ # Configuration for the page
16
+ st.set_page_config(
17
+ page_title="Meeting Audio Analyzer",
18
+ layout="wide"
19
+ )
20
+
21
+ # Sidebar controls
22
+ with st.sidebar:
23
+ st.title("Settings")
24
+
25
+ # Language selection
26
+ language = st.selectbox(
27
+ "Select language",
28
+ ["English", "Turkish", "Spanish", "French", "German"]
29
+ )
30
+
31
+ # Number of speakers
32
+ num_speakers = st.number_input(
33
+ "Enter number of speakers",
34
+ min_value=1,
35
+ max_value=10,
36
+ value=2
37
+ )
38
+
39
+ # File upload
40
+ uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])
41
+
42
+ # Action buttons
43
+ if uploaded_file:
44
+ if st.button("Diarize"):
45
+ with st.spinner("Processing audio..."):
46
+ # Save uploaded file temporarily
47
+ with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
48
+ tmp_file.write(uploaded_file.getvalue())
49
+ temp_path = tmp_file.name
50
+
51
+ try:
52
+ # Configure Gemini
53
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
54
+
55
+ # Upload to Gemini
56
+ gemini_file = genai.upload_file(temp_path, mime_type="audio/mpeg")
57
+
58
+ # Create diarization model and process
59
+ diarization_config = {
60
+ "temperature": 0.5,
61
+ "top_p": 0.95, #0.95
62
+ "top_k": 40,
63
+ "max_output_tokens": 8192,
64
+ "response_mime_type": "application/json",
65
+ }
66
+
67
+ diarization_model = genai.GenerativeModel(
68
+ model_name="gemini-2.0-flash-exp",
69
+ generation_config=diarization_config,
70
+ safety_settings={
71
+ 'HATE': 'BLOCK_NONE',
72
+ 'HARASSMENT': 'BLOCK_NONE',
73
+ 'SEXUAL': 'BLOCK_NONE',
74
+ 'DANGEROUS': 'BLOCK_NONE'
75
+ }
76
+ )
77
+
78
+ # Process diarization
79
+ chat_session = diarization_model.start_chat(
80
+ history=[{"role": "user", "parts": [gemini_file]}]
81
+ )
82
+
83
+ response = chat_session.send_message(
84
+ f"Generate meeting diarization of the meeting audio record provided in the file. "
85
+ f"The meeting may be in a foreign language, expect a mixture of words in local language "
86
+ f"and words in english. Provided audio has {num_speakers} speakers. "
87
+ f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
88
+ f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
89
+ f"speech (transcription). Do not transcribe filler words."
90
+ )
91
+
92
+ json_data = json.loads(response.text)
93
+ formatted_output = ""
94
+ for item in json_data:
95
+ formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"
96
+
97
+ st.session_state.diarization_output = formatted_output
98
+
99
+ except Exception as e:
100
+ st.error(f"Error processing audio: {str(e)}")
101
+ finally:
102
+ # Clean up temp file
103
+ os.unlink(temp_path)
104
+
105
+ if st.button("Summarize") and st.session_state.diarization_output:
106
+ with st.spinner("Generating summary..."):
107
+ try:
108
+ # Configure summarization model
109
+ summarization_config = {
110
+ "temperature": 0.25,
111
+ "top_p": 0.95,
112
+ "top_k": 40,
113
+ "max_output_tokens": 8192,
114
+ "response_schema": content.Schema(
115
+ type=content.Type.OBJECT,
116
+ enum=[],
117
+ required=["summary"],
118
+ properties={
119
+ "summary": content.Schema(
120
+ type=content.Type.STRING,
121
+ ),
122
+ },
123
+ ),
124
+ "response_mime_type": "application/json",
125
+ }
126
+
127
+ summarization_model = genai.GenerativeModel(
128
+ model_name="gemini-2.0-flash-exp",
129
+ generation_config=summarization_config,
130
+ safety_settings={
131
+ 'HATE': 'BLOCK_NONE',
132
+ 'HARASSMENT': 'BLOCK_NONE',
133
+ 'SEXUAL': 'BLOCK_NONE',
134
+ 'DANGEROUS': 'BLOCK_NONE'
135
+ }
136
+ )
137
+
138
+ # Generate summary
139
+ chat_session = summarization_model.start_chat(
140
+ history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
141
+ )
142
+
143
+ response = chat_session.send_message(
144
+ f"Generate a detailed summarization of the meeting, provide information on "
145
+ f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
146
+ f"Provide structured output with only one tag 'summary'. Generate response in {language}."
147
+ )
148
+
149
+ json_data = json.loads(response.text)
150
+ summary = json_data.get('summary', "No summary found.")
151
+ st.session_state.chat_history.append(("Summary", summary))
152
+
153
+ except Exception as e:
154
+ st.error(f"Error generating summary: {str(e)}")
155
+
156
+ # Main chat interface
157
+ st.title("Meeting Audio Analyzer")
158
+
159
+ # Diarization output in collapsible section
160
+ if st.session_state.diarization_output:
161
+ with st.expander("Diarization Output", expanded=False):
162
+ st.text_area("Transcript", st.session_state.diarization_output, height=300)
163
+
164
+ # Chat history
165
+ for role, message in st.session_state.chat_history:
166
+ if role == "User":
167
+ st.write(f"User: {message}")
168
+ elif role == "Bot":
169
+ st.write(f"Bot: {message}")
170
+ else: # Summary
171
+ st.write("Meeting Summary:")
172
+ st.write(message)
173
+
174
+ # Question input
175
+ if st.session_state.diarization_output:
176
+ question = st.text_input("Type in your question")
177
+ if st.button("Send"):
178
+ if question:
179
+ # Add user question to chat history
180
+ st.session_state.chat_history.append(("User", question))
181
+
182
+ with st.spinner("Generating response..."):
183
+ try:
184
+ # Configure QnA model
185
+ qna_config = {
186
+ "temperature": 0.25,
187
+ "top_p": 0.95,
188
+ "top_k": 40,
189
+ "max_output_tokens": 8192,
190
+ "response_schema": content.Schema(
191
+ type=content.Type.OBJECT,
192
+ enum=[],
193
+ required=["answer"],
194
+ properties={
195
+ "answer": content.Schema(
196
+ type=content.Type.STRING,
197
+ ),
198
+ },
199
+ ),
200
+ "response_mime_type": "application/json",
201
+ }
202
+
203
+ qna_model = genai.GenerativeModel(
204
+ model_name="gemini-2.0-flash-exp",
205
+ generation_config=qna_config,
206
+ safety_settings={
207
+ 'HATE': 'BLOCK_NONE',
208
+ 'HARASSMENT': 'BLOCK_NONE',
209
+ 'SEXUAL': 'BLOCK_NONE',
210
+ 'DANGEROUS': 'BLOCK_NONE'
211
+ }
212
+ )
213
+
214
+ # Generate answer
215
+ chat_session = qna_model.start_chat(
216
+ history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
217
+ )
218
+
219
+ response = chat_session.send_message(
220
+ f"Answer the following question based on the meeting: {question}. Generate response in {language}."
221
+ f"Provide structured output with only one tag 'answer'."
222
+ )
223
+
224
+ json_data = json.loads(response.text)
225
+ answer = json_data.get('answer', "No answer found.")
226
+
227
+ # Add bot response to chat history
228
+ st.session_state.chat_history.append(("Bot", answer))
229
+
230
+ # Rerun to update the chat display
231
+ st.rerun()
232
+
233
+ except Exception as e:
234
+ st.error(f"Error generating answer: {str(e)}")