BoraAk / app.py
turanhasan's picture
Update app.py
1f244c9 verified
raw
history blame
19.9 kB
import streamlit as st
import os
import google.generativeai as genai
from google.ai.generativelanguage_v1beta.types import content
import json
from tempfile import NamedTemporaryFile
from datetime import datetime
import io
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter
from reportlab.lib.units import inch
import smtplib
from email.mime.text import MIMEText
from streamlit_mic_recorder import mic_recorder
import wave
# Install streamlit-mic-recorder if not already installed:
# pip install streamlit-mic-recorder
# Initialize session state for chat history if it doesn't exist
if 'chat_history' not in st.session_state:
st.session_state.chat_history = []
if 'diarization_output' not in st.session_state:
st.session_state.diarization_output = None
if 'uploaded_file' not in st.session_state:
st.session_state.uploaded_file = None
if 'language' not in st.session_state:
st.session_state.language = "English"
if 'num_speakers' not in st.session_state:
st.session_state.num_speakers = 2
if 'summary_output' not in st.session_state:
st.session_state.summary_output = None
if 'key_decisions_output' not in st.session_state:
st.session_state.key_decisions_output = None
if 'email_sent_message' not in st.session_state:
st.session_state.email_sent_message = ""
if 'recorded_audio' not in st.session_state:
st.session_state.recorded_audio = None
# Configuration for the page
st.set_page_config(
page_title="AI Meeting Notes & Reporting",
layout="wide"
)
# Function to generate PDF report
def generate_pdf_report(meeting_date, summary, key_decisions, transcription):
buffer = io.BytesIO()
p = canvas.Canvas(buffer, pagesize=letter)
p.setFont("Helvetica-Bold", 16)
p.drawString(inch, 10.5*inch, "Meeting Report")
p.setFont("Helvetica", 12)
p.drawString(inch, 10*inch, f"Date: {meeting_date.strftime('%Y-%m-%d')}")
y_position = 9.5*inch
p.setFont("Helvetica-Bold", 12)
p.drawString(inch, y_position, "Summary:")
y_position -= 0.3*inch
p.setFont("Helvetica", 10)
summary_lines = summary.split('\n')
for line in summary_lines:
p.drawString(inch, y_position, line)
y_position -= 0.2*inch
if y_position < 1*inch: # Simple page break to avoid content overflow - improve if needed
p.showPage()
y_position = 10.5*inch
p.setFont("Helvetica", 10)
if key_decisions:
p.setFont("Helvetica-Bold", 12)
p.drawString(inch, y_position, "Key Decisions:")
y_position -= 0.3*inch
p.setFont("Helvetica", 10)
key_decisions_list = key_decisions.strip().split('\n')
for decision in key_decisions_list:
if decision.strip():
p.drawString(inch, y_position, f"- {decision.strip()}")
y_position -= 0.2*inch
if y_position < 1*inch: # Simple page break
p.showPage()
y_position = 10.5*inch
p.setFont("Helvetica", 10)
if transcription:
p.setFont("Helvetica-Bold", 12)
p.drawString(inch, y_position, "Transcription:")
y_position -= 0.3*inch
p.setFont("Helvetica", 8) # Smaller font for transcription
transcription_lines = transcription.split('\n')
for line in transcription_lines:
p.drawString(inch, y_position, line)
y_position -= 0.15*inch # Reduced line spacing for transcription
if y_position < 1*inch: # Simple page break
p.showPage()
y_position = 10.5*inch
p.setFont("Helvetica", 8)
p.save()
pdf_out = buffer.getvalue()
buffer.close()
return pdf_out
def send_email_report(email_address, meeting_date, summary, key_decisions, transcription):
smtp_server = os.environ.get("SMTP_SERVER")
smtp_port = os.environ.get("SMTP_PORT")
smtp_username = os.environ.get("SMTP_USERNAME")
smtp_password = os.environ.get("SMTP_PASSWORD")
sender_email = smtp_username # For simplicity, assuming sender is the same as username
if not all([smtp_server, smtp_port, smtp_username, smtp_password, sender_email]):
return False, "SMTP configuration is missing. Please set environment variables: SMTP_SERVER, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD."
subject = f"Meeting Report - {meeting_date.strftime('%Y-%m-%d')}"
body = f"Meeting Date: {meeting_date.strftime('%Y-%m-%d')}\n\nSummary:\n{summary}\n\nKey Decisions:\n{key_decisions}\n\nTranscription:\n{transcription}"
msg = MIMEText(body)
msg['Subject'] = subject
msg['From'] = sender_email
msg['To'] = email_address
try:
with smtplib.SMTP(smtp_server, smtp_port) as server:
server.starttls()
server.login(smtp_username, smtp_password)
server.sendmail(sender_email, email_address, msg.as_string())
return True, "Email sent successfully!"
except Exception as e:
return False, f"Email sending failed: {e}"
# Main UI
st.title("AI Meeting Notes & Reporting")
# Meeting Date & Time
meeting_date_time = st.date_input("Meeting Date & Time", datetime.today())
# Number of speakers
num_speakers = st.number_input("Number of speakers", min_value=1, max_value=10, value=st.session_state.num_speakers)
st.session_state.num_speakers = num_speakers # Update session state
# Language selection
language = st.selectbox(
"Language of report",
["English", "Turkish", "Spanish", "French", "German"],
index=["English", "Turkish", "Spanish", "French", "German"].index(st.session_state.language) if st.session_state.language in ["English", "Turkish", "Spanish", "French", "German"] else 0
)
st.session_state.language = language # Update session state
# File upload
uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])
# Voice recording
audio_bytes = mic_recorder(start_prompt="Record", stop_prompt="Stop recording", key='recorder')
if audio_bytes:
if isinstance(audio_bytes, dict) and "bytes" in audio_bytes: # Check if audio_bytes is a dict and has 'bytes' key
st.audio(audio_bytes["bytes"], format="audio/wav")
st.session_state.recorded_audio = audio_bytes["bytes"]
else: # If not a dict or doesn't have 'bytes' key, assume it's raw bytes (fallback, might need adjustment)
st.audio(audio_bytes, format="audio/wav")
st.session_state.recorded_audio = audio_bytes
# Diarization, Summarization and Key Decisions logic - Automatically after upload or record
process_audio = False
audio_source_indicator = ""
if uploaded_file and uploaded_file != st.session_state.uploaded_file: # Check if a new file is uploaded
st.session_state.uploaded_file = uploaded_file # Update session state
st.session_state.recorded_audio = None # Reset recorded audio
process_audio = True
audio_source_indicator = f"Processing uploaded file: {uploaded_file.name}"
elif st.session_state.recorded_audio and st.session_state.recorded_audio != getattr(st.session_state.get('last_recorded_audio_hash'), 'value', None): # Check if new recording
st.session_state.last_recorded_audio_hash = st.session_state.recorded_audio # Store hash to detect new recordings
st.session_state.uploaded_file = None # Reset uploaded file
process_audio = True
audio_source_indicator = "Processing recorded audio"
if process_audio:
st.session_state.diarization_output = None # Reset previous diarization output
st.session_state.summary_output = None # Reset previous summary output
st.session_state.key_decisions_output = None # Reset previous key decisions output
st.session_state.chat_history = [] # Clear chat history for new file
st.session_state.email_sent_message = "" # Clear email sent message
with st.spinner(f"Processing audio and generating summary and key decisions... {audio_source_indicator}"):
temp_path = None
try:
# Configure Gemini
genai.configure(api_key=os.environ["GEMINI_API_KEY"])
if st.session_state.uploaded_file: # Process uploaded file
# Save uploaded file temporarily
with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: # Assuming mp3 for wider compatibility, could adjust based on uploaded file type
tmp_file.write(st.session_state.uploaded_file.getvalue())
temp_path = tmp_file.name
mime_type = "audio/mpeg" # Assuming mp3, adjust if needed based on file type
gemini_file = genai.upload_file(temp_path, mime_type=mime_type)
elif st.session_state.recorded_audio: # Process recorded audio
# Save recorded audio temporarily (WAV from mic_recorder) and convert to MP3 if needed for Gemini
with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file_wav:
tmp_file_wav.write(st.session_state.recorded_audio)
temp_path = tmp_file_wav.name
gemini_file = genai.upload_file(temp_path, mime_type="audio/wav") # Assuming WAV is directly compatible
# --- Diarization ---
diarization_config = {
"temperature": 0.5,
"top_p": 0.95, #0.95
"top_k": 40,
"max_output_tokens": 8192,
"response_mime_type": "application/json",
}
diarization_model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=diarization_config,
safety_settings={
'HATE': 'BLOCK_NONE',
'HARASSMENT': 'BLOCK_NONE',
'SEXUAL': 'BLOCK_NONE',
'DANGEROUS': 'BLOCK_NONE'
}
)
chat_session_diarization = diarization_model.start_chat(
history=[{"role": "user", "parts": [gemini_file]}]
)
response_diarization = chat_session_diarization.send_message(
f"Generate meeting diarization of the meeting audio record provided in the file. "
f"The meeting may be in a foreign language, expect a mixture of words in local language "
f"and words in english. Provided audio has {num_speakers} speakers. "
f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
f"speech (transcription). Do not transcribe filler words."
)
json_data_diarization = json.loads(response_diarization.text)
formatted_output = ""
for item in json_data_diarization:
formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"
st.session_state.diarization_output = formatted_output
# --- Summarization ---
summarization_config = {
"temperature": 0.25,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_schema": content.Schema(
type=content.Type.OBJECT,
enum=[],
required=["summary"],
properties={
"summary": content.Schema(
type=content.Type.STRING,
),
},
),
"response_mime_type": "application/json",
}
summarization_model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=summarization_config,
safety_settings={
'HATE': 'BLOCK_NONE',
'HARASSMENT': 'BLOCK_NONE',
'SEXUAL': 'BLOCK_NONE',
'DANGEROUS': 'BLOCK_NONE'
}
)
chat_session_summarization = summarization_model.start_chat(
history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
)
response_summarization = chat_session_summarization.send_message(
f"Generate a detailed summarization of the meeting, provide information on "
f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
f"Provide structured output with only one tag 'summary'. Generate response in {language}."
)
json_data_summarization = json.loads(response_summarization.text)
summary = json_data_summarization.get('summary', "No summary found.")
st.session_state.summary_output = summary
st.session_state.chat_history.append(("Summary", summary))
# --- Key Decisions ---
key_decisions_config = {
"temperature": 0.25,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_schema": content.Schema(
type=content.Type.OBJECT,
enum=[],
required=["key_decisions"],
properties={
"key_decisions": content.Schema(
type=content.Type.STRING,
),
},
),
"response_mime_type": "application/json",
}
key_decisions_model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=key_decisions_config,
safety_settings={
'HATE': 'BLOCK_NONE',
'HARASSMENT': 'BLOCK_NONE',
'SEXUAL': 'BLOCK_NONE',
'DANGEROUS': 'BLOCK_NONE'
}
)
chat_session_key_decisions = key_decisions_model.start_chat(
history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
)
response_key_decisions = chat_session_key_decisions.send_message(
f"Identify and list the key decisions made during the meeting. "
f"Generate response in {language}."
)
json_data_key_decisions = json.loads(response_key_decisions.text)
key_decisions = json_data_key_decisions.get('key_decisions', "No key decisions found.")
st.session_state.key_decisions_output = key_decisions
except Exception as e:
st.error(f"Error processing audio: {str(e)}")
finally:
# Clean up temp file if created
if temp_path:
os.unlink(temp_path)
# Diarization output display
if st.session_state.diarization_output:
st.subheader("Diarization Output")
st.text_area("Transcript", st.session_state.diarization_output, height=300)
# Summary output
if st.session_state.summary_output:
st.subheader("Summary")
st.write(st.session_state.summary_output)
# Key decisions output
if st.session_state.key_decisions_output:
st.subheader("Key decisions")
key_decisions_list = st.session_state.key_decisions_output.strip().split('\n') # Split by newline
for decision in key_decisions_list:
if decision.strip(): # make sure decision is not empty
st.markdown(f"- {decision.strip()}")
# Generate PDF Report button
if st.button("Generate PDF report"):
if st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output:
pdf_bytes = generate_pdf_report(
meeting_date_time,
st.session_state.summary_output,
st.session_state.key_decisions_output,
st.session_state.diarization_output
)
st.download_button(
label="Download PDF Report",
data=pdf_bytes,
file_name="meeting_report.pdf",
mime="application/pdf"
)
else:
st.warning("Please upload or record audio to generate report.")
# Q&A section
if st.session_state.diarization_output:
st.subheader("Question Answering")
question = st.text_input("Type in your question")
if st.button("Send"):
if question:
# Add user question to chat history
st.session_state.chat_history.append(("User", question))
with st.spinner("Generating response..."):
try:
# Configure QnA model
qna_config = {
"temperature": 0.25,
"top_p": 0.95,
"top_k": 40,
"max_output_tokens": 8192,
"response_schema": content.Schema(
type=content.Type.OBJECT,
enum=[],
required=["answer"],
properties={
"answer": content.Schema(
type=content.Type.STRING,
),
},
),
"response_mime_type": "application/json",
}
qna_model = genai.GenerativeModel(
model_name="gemini-2.0-flash-exp",
generation_config=qna_config,
safety_settings={
'HATE': 'BLOCK_NONE',
'HARASSMENT': 'BLOCK_NONE',
'SEXUAL': 'BLOCK_NONE',
'DANGEROUS': 'BLOCK_NONE'
}
)
# Generate answer
chat_session_qna = qna_model.start_chat(
history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
)
response_qna = chat_session_qna.send_message(
f"Answer the following question based on the meeting: {question}. Generate response in {language}."
f"Provide structured output with only one tag 'answer'."
)
json_data_qna = json.loads(response_qna.text)
answer = json_data_qna.get('answer', "No answer found.")
# Add bot response to chat history
st.session_state.chat_history.append(("Bot", answer))
st.rerun() # Rerun to update the chat display
except Exception as e:
st.error(f"Error generating answer: {str(e)}")
# Chat history display for Q&A
for role, message in st.session_state.chat_history:
if role == "User":
st.write(f"**Question**: {message}")
elif role == "Bot":
st.write(f"**Answer**: {message}")
# Email input and Send Report button
st.subheader("Share Report")
email_address = st.text_input("Email address:")
send_button = st.button("Send Report")
if send_button:
if not email_address:
st.warning("Please enter an email address.")
elif not (st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output):
st.warning("Please upload or record audio and generate report first.")
else:
success, message = send_email_report(
email_address,
meeting_date_time,
st.session_state.summary_output,
st.session_state.key_decisions_output,
st.session_state.diarization_output
)
if success:
st.success(message)
else:
st.error(message)