import streamlit as st import os import google.generativeai as genai from google.ai.generativelanguage_v1beta.types import content import json from tempfile import NamedTemporaryFile from datetime import datetime import io from reportlab.pdfgen import canvas from reportlab.lib.pagesizes import letter from reportlab.lib.units import inch import smtplib from email.mime.text import MIMEText from streamlit_mic_recorder import mic_recorder import wave # Install streamlit-mic-recorder if not already installed: # pip install streamlit-mic-recorder # Initialize session state for chat history if it doesn't exist if 'chat_history' not in st.session_state: st.session_state.chat_history = [] if 'diarization_output' not in st.session_state: st.session_state.diarization_output = None if 'uploaded_file' not in st.session_state: st.session_state.uploaded_file = None if 'language' not in st.session_state: st.session_state.language = "English" if 'num_speakers' not in st.session_state: st.session_state.num_speakers = 2 if 'summary_output' not in st.session_state: st.session_state.summary_output = None if 'key_decisions_output' not in st.session_state: st.session_state.key_decisions_output = None if 'email_sent_message' not in st.session_state: st.session_state.email_sent_message = "" if 'recorded_audio' not in st.session_state: st.session_state.recorded_audio = None # Configuration for the page st.set_page_config( page_title="AI Meeting Notes & Reporting", layout="wide" ) # Function to generate PDF report def generate_pdf_report(meeting_date, summary, key_decisions, transcription): buffer = io.BytesIO() p = canvas.Canvas(buffer, pagesize=letter) p.setFont("Helvetica-Bold", 16) p.drawString(inch, 10.5*inch, "Meeting Report") p.setFont("Helvetica", 12) p.drawString(inch, 10*inch, f"Date: {meeting_date.strftime('%Y-%m-%d')}") y_position = 9.5*inch p.setFont("Helvetica-Bold", 12) p.drawString(inch, y_position, "Summary:") y_position -= 0.3*inch p.setFont("Helvetica", 10) summary_lines = summary.split('\n') for line in summary_lines: p.drawString(inch, y_position, line) y_position -= 0.2*inch if y_position < 1*inch: # Simple page break to avoid content overflow - improve if needed p.showPage() y_position = 10.5*inch p.setFont("Helvetica", 10) if key_decisions: p.setFont("Helvetica-Bold", 12) p.drawString(inch, y_position, "Key Decisions:") y_position -= 0.3*inch p.setFont("Helvetica", 10) key_decisions_list = key_decisions.strip().split('\n') for decision in key_decisions_list: if decision.strip(): p.drawString(inch, y_position, f"- {decision.strip()}") y_position -= 0.2*inch if y_position < 1*inch: # Simple page break p.showPage() y_position = 10.5*inch p.setFont("Helvetica", 10) if transcription: p.setFont("Helvetica-Bold", 12) p.drawString(inch, y_position, "Transcription:") y_position -= 0.3*inch p.setFont("Helvetica", 8) # Smaller font for transcription transcription_lines = transcription.split('\n') for line in transcription_lines: p.drawString(inch, y_position, line) y_position -= 0.15*inch # Reduced line spacing for transcription if y_position < 1*inch: # Simple page break p.showPage() y_position = 10.5*inch p.setFont("Helvetica", 8) p.save() pdf_out = buffer.getvalue() buffer.close() return pdf_out def send_email_report(email_address, meeting_date, summary, key_decisions, transcription): smtp_server = os.environ.get("SMTP_SERVER") smtp_port = os.environ.get("SMTP_PORT") smtp_username = os.environ.get("SMTP_USERNAME") smtp_password = os.environ.get("SMTP_PASSWORD") sender_email = smtp_username # For simplicity, assuming sender is the same as username if not all([smtp_server, smtp_port, smtp_username, smtp_password, sender_email]): return False, "SMTP configuration is missing. Please set environment variables: SMTP_SERVER, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD." subject = f"Meeting Report - {meeting_date.strftime('%Y-%m-%d')}" body = f"Meeting Date: {meeting_date.strftime('%Y-%m-%d')}\n\nSummary:\n{summary}\n\nKey Decisions:\n{key_decisions}\n\nTranscription:\n{transcription}" msg = MIMEText(body) msg['Subject'] = subject msg['From'] = sender_email msg['To'] = email_address try: with smtplib.SMTP(smtp_server, smtp_port) as server: server.starttls() server.login(smtp_username, smtp_password) server.sendmail(sender_email, email_address, msg.as_string()) return True, "Email sent successfully!" except Exception as e: return False, f"Email sending failed: {e}" # Main UI st.title("AI Meeting Notes & Reporting") # Meeting Date & Time meeting_date_time = st.date_input("Meeting Date & Time", datetime.today()) # Number of speakers num_speakers = st.number_input("Number of speakers", min_value=1, max_value=10, value=st.session_state.num_speakers) st.session_state.num_speakers = num_speakers # Update session state # Language selection language = st.selectbox( "Language of report", ["English", "Turkish", "Spanish", "French", "German"], index=["English", "Turkish", "Spanish", "French", "German"].index(st.session_state.language) if st.session_state.language in ["English", "Turkish", "Spanish", "French", "German"] else 0 ) st.session_state.language = language # Update session state # File upload uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav']) # Voice recording audio_bytes = mic_recorder(start_prompt="Record", stop_prompt="Stop recording", key='recorder') if audio_bytes: if isinstance(audio_bytes, dict) and "bytes" in audio_bytes: # Check if audio_bytes is a dict and has 'bytes' key st.audio(audio_bytes["bytes"], format="audio/wav") st.session_state.recorded_audio = audio_bytes["bytes"] else: # If not a dict or doesn't have 'bytes' key, assume it's raw bytes (fallback, might need adjustment) st.audio(audio_bytes, format="audio/wav") st.session_state.recorded_audio = audio_bytes # Diarization, Summarization and Key Decisions logic - Automatically after upload or record process_audio = False audio_source_indicator = "" if uploaded_file and uploaded_file != st.session_state.uploaded_file: # Check if a new file is uploaded st.session_state.uploaded_file = uploaded_file # Update session state st.session_state.recorded_audio = None # Reset recorded audio process_audio = True audio_source_indicator = f"Processing uploaded file: {uploaded_file.name}" elif st.session_state.recorded_audio and st.session_state.recorded_audio != getattr(st.session_state.get('last_recorded_audio_hash'), 'value', None): # Check if new recording st.session_state.last_recorded_audio_hash = st.session_state.recorded_audio # Store hash to detect new recordings st.session_state.uploaded_file = None # Reset uploaded file process_audio = True audio_source_indicator = "Processing recorded audio" if process_audio: st.session_state.diarization_output = None # Reset previous diarization output st.session_state.summary_output = None # Reset previous summary output st.session_state.key_decisions_output = None # Reset previous key decisions output st.session_state.chat_history = [] # Clear chat history for new file st.session_state.email_sent_message = "" # Clear email sent message with st.spinner(f"Processing audio and generating summary and key decisions... {audio_source_indicator}"): temp_path = None try: # Configure Gemini genai.configure(api_key=os.environ["GEMINI_API_KEY"]) if st.session_state.uploaded_file: # Process uploaded file # Save uploaded file temporarily with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: # Assuming mp3 for wider compatibility, could adjust based on uploaded file type tmp_file.write(st.session_state.uploaded_file.getvalue()) temp_path = tmp_file.name mime_type = "audio/mpeg" # Assuming mp3, adjust if needed based on file type gemini_file = genai.upload_file(temp_path, mime_type=mime_type) elif st.session_state.recorded_audio: # Process recorded audio # Save recorded audio temporarily (WAV from mic_recorder) and convert to MP3 if needed for Gemini with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file_wav: tmp_file_wav.write(st.session_state.recorded_audio) temp_path = tmp_file_wav.name gemini_file = genai.upload_file(temp_path, mime_type="audio/wav") # Assuming WAV is directly compatible # --- Diarization --- diarization_config = { "temperature": 0.5, "top_p": 0.95, #0.95 "top_k": 40, "max_output_tokens": 8192, "response_mime_type": "application/json", } diarization_model = genai.GenerativeModel( model_name="gemini-2.0-flash-exp", generation_config=diarization_config, safety_settings={ 'HATE': 'BLOCK_NONE', 'HARASSMENT': 'BLOCK_NONE', 'SEXUAL': 'BLOCK_NONE', 'DANGEROUS': 'BLOCK_NONE' } ) chat_session_diarization = diarization_model.start_chat( history=[{"role": "user", "parts": [gemini_file]}] ) response_diarization = chat_session_diarization.send_message( f"Generate meeting diarization of the meeting audio record provided in the file. " f"The meeting may be in a foreign language, expect a mixture of words in local language " f"and words in english. Provided audio has {num_speakers} speakers. " f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. " f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), " f"speech (transcription). Do not transcribe filler words." ) json_data_diarization = json.loads(response_diarization.text) formatted_output = "" for item in json_data_diarization: formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n" st.session_state.diarization_output = formatted_output # --- Summarization --- summarization_config = { "temperature": 0.25, "top_p": 0.95, "top_k": 40, "max_output_tokens": 8192, "response_schema": content.Schema( type=content.Type.OBJECT, enum=[], required=["summary"], properties={ "summary": content.Schema( type=content.Type.STRING, ), }, ), "response_mime_type": "application/json", } summarization_model = genai.GenerativeModel( model_name="gemini-2.0-flash-exp", generation_config=summarization_config, safety_settings={ 'HATE': 'BLOCK_NONE', 'HARASSMENT': 'BLOCK_NONE', 'SEXUAL': 'BLOCK_NONE', 'DANGEROUS': 'BLOCK_NONE' } ) chat_session_summarization = summarization_model.start_chat( history=[{"role": "user", "parts": [st.session_state.diarization_output]}] ) response_summarization = chat_session_summarization.send_message( f"Generate a detailed summarization of the meeting, provide information on " f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. " f"Provide structured output with only one tag 'summary'. Generate response in {language}." ) json_data_summarization = json.loads(response_summarization.text) summary = json_data_summarization.get('summary', "No summary found.") st.session_state.summary_output = summary st.session_state.chat_history.append(("Summary", summary)) # --- Key Decisions --- key_decisions_config = { "temperature": 0.25, "top_p": 0.95, "top_k": 40, "max_output_tokens": 8192, "response_schema": content.Schema( type=content.Type.OBJECT, enum=[], required=["key_decisions"], properties={ "key_decisions": content.Schema( type=content.Type.STRING, ), }, ), "response_mime_type": "application/json", } key_decisions_model = genai.GenerativeModel( model_name="gemini-2.0-flash-exp", generation_config=key_decisions_config, safety_settings={ 'HATE': 'BLOCK_NONE', 'HARASSMENT': 'BLOCK_NONE', 'SEXUAL': 'BLOCK_NONE', 'DANGEROUS': 'BLOCK_NONE' } ) chat_session_key_decisions = key_decisions_model.start_chat( history=[{"role": "user", "parts": [st.session_state.diarization_output]}] ) response_key_decisions = chat_session_key_decisions.send_message( f"Identify and list the key decisions made during the meeting. " f"Generate response in {language}." ) json_data_key_decisions = json.loads(response_key_decisions.text) key_decisions = json_data_key_decisions.get('key_decisions', "No key decisions found.") st.session_state.key_decisions_output = key_decisions except Exception as e: st.error(f"Error processing audio: {str(e)}") finally: # Clean up temp file if created if temp_path: os.unlink(temp_path) # Diarization output display if st.session_state.diarization_output: st.subheader("Diarization Output") st.text_area("Transcript", st.session_state.diarization_output, height=300) # Summary output if st.session_state.summary_output: st.subheader("Summary") st.write(st.session_state.summary_output) # Key decisions output if st.session_state.key_decisions_output: st.subheader("Key decisions") key_decisions_list = st.session_state.key_decisions_output.strip().split('\n') # Split by newline for decision in key_decisions_list: if decision.strip(): # make sure decision is not empty st.markdown(f"- {decision.strip()}") # Generate PDF Report button if st.button("Generate PDF report"): if st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output: pdf_bytes = generate_pdf_report( meeting_date_time, st.session_state.summary_output, st.session_state.key_decisions_output, st.session_state.diarization_output ) st.download_button( label="Download PDF Report", data=pdf_bytes, file_name="meeting_report.pdf", mime="application/pdf" ) else: st.warning("Please upload or record audio to generate report.") # Q&A section if st.session_state.diarization_output: st.subheader("Question Answering") question = st.text_input("Type in your question") if st.button("Send"): if question: # Add user question to chat history st.session_state.chat_history.append(("User", question)) with st.spinner("Generating response..."): try: # Configure QnA model qna_config = { "temperature": 0.25, "top_p": 0.95, "top_k": 40, "max_output_tokens": 8192, "response_schema": content.Schema( type=content.Type.OBJECT, enum=[], required=["answer"], properties={ "answer": content.Schema( type=content.Type.STRING, ), }, ), "response_mime_type": "application/json", } qna_model = genai.GenerativeModel( model_name="gemini-2.0-flash-exp", generation_config=qna_config, safety_settings={ 'HATE': 'BLOCK_NONE', 'HARASSMENT': 'BLOCK_NONE', 'SEXUAL': 'BLOCK_NONE', 'DANGEROUS': 'BLOCK_NONE' } ) # Generate answer chat_session_qna = qna_model.start_chat( history=[{"role": "user", "parts": [st.session_state.diarization_output]}] ) response_qna = chat_session_qna.send_message( f"Answer the following question based on the meeting: {question}. Generate response in {language}." f"Provide structured output with only one tag 'answer'." ) json_data_qna = json.loads(response_qna.text) answer = json_data_qna.get('answer', "No answer found.") # Add bot response to chat history st.session_state.chat_history.append(("Bot", answer)) st.rerun() # Rerun to update the chat display except Exception as e: st.error(f"Error generating answer: {str(e)}") # Chat history display for Q&A for role, message in st.session_state.chat_history: if role == "User": st.write(f"**Question**: {message}") elif role == "Bot": st.write(f"**Answer**: {message}") # Email input and Send Report button st.subheader("Share Report") email_address = st.text_input("Email address:") send_button = st.button("Send Report") if send_button: if not email_address: st.warning("Please enter an email address.") elif not (st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output): st.warning("Please upload or record audio and generate report first.") else: success, message = send_email_report( email_address, meeting_date_time, st.session_state.summary_output, st.session_state.key_decisions_output, st.session_state.diarization_output ) if success: st.success(message) else: st.error(message)