Spaces:

turanhasan
/

BoraAk

Sleeping

App Files Files Community

BoraAk / app.py

turanhasan

Update app.py

1f244c9 verified 5 months ago

raw

history blame

19.9 kB

	import streamlit as st
	import os
	import google.generativeai as genai
	from google.ai.generativelanguage_v1beta.types import content
	import json
	from tempfile import NamedTemporaryFile
	from datetime import datetime
	import io
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter
	from reportlab.lib.units import inch
	import smtplib
	from email.mime.text import MIMEText
	from streamlit_mic_recorder import mic_recorder
	import wave

	# Install streamlit-mic-recorder if not already installed:
	# pip install streamlit-mic-recorder

	# Initialize session state for chat history if it doesn't exist
	if 'chat_history' not in st.session_state:
	st.session_state.chat_history = []

	if 'diarization_output' not in st.session_state:
	st.session_state.diarization_output = None

	if 'uploaded_file' not in st.session_state:
	st.session_state.uploaded_file = None

	if 'language' not in st.session_state:
	st.session_state.language = "English"

	if 'num_speakers' not in st.session_state:
	st.session_state.num_speakers = 2

	if 'summary_output' not in st.session_state:
	st.session_state.summary_output = None

	if 'key_decisions_output' not in st.session_state:
	st.session_state.key_decisions_output = None

	if 'email_sent_message' not in st.session_state:
	st.session_state.email_sent_message = ""

	if 'recorded_audio' not in st.session_state:
	st.session_state.recorded_audio = None


	# Configuration for the page
	st.set_page_config(
	page_title="AI Meeting Notes & Reporting",
	layout="wide"
	)

	# Function to generate PDF report
	def generate_pdf_report(meeting_date, summary, key_decisions, transcription):
	buffer = io.BytesIO()
	p = canvas.Canvas(buffer, pagesize=letter)
	p.setFont("Helvetica-Bold", 16)
	p.drawString(inch, 10.5*inch, "Meeting Report")
	p.setFont("Helvetica", 12)
	p.drawString(inch, 10*inch, f"Date: {meeting_date.strftime('%Y-%m-%d')}")

	y_position = 9.5*inch

	p.setFont("Helvetica-Bold", 12)
	p.drawString(inch, y_position, "Summary:")
	y_position -= 0.3*inch
	p.setFont("Helvetica", 10)
	summary_lines = summary.split('\n')
	for line in summary_lines:
	p.drawString(inch, y_position, line)
	y_position -= 0.2*inch
	if y_position < 1*inch: # Simple page break to avoid content overflow - improve if needed
	p.showPage()
	y_position = 10.5*inch
	p.setFont("Helvetica", 10)


	if key_decisions:
	p.setFont("Helvetica-Bold", 12)
	p.drawString(inch, y_position, "Key Decisions:")
	y_position -= 0.3*inch
	p.setFont("Helvetica", 10)
	key_decisions_list = key_decisions.strip().split('\n')
	for decision in key_decisions_list:
	if decision.strip():
	p.drawString(inch, y_position, f"- {decision.strip()}")
	y_position -= 0.2*inch
	if y_position < 1*inch: # Simple page break
	p.showPage()
	y_position = 10.5*inch
	p.setFont("Helvetica", 10)

	if transcription:
	p.setFont("Helvetica-Bold", 12)
	p.drawString(inch, y_position, "Transcription:")
	y_position -= 0.3*inch
	p.setFont("Helvetica", 8) # Smaller font for transcription
	transcription_lines = transcription.split('\n')
	for line in transcription_lines:
	p.drawString(inch, y_position, line)
	y_position -= 0.15*inch # Reduced line spacing for transcription
	if y_position < 1*inch: # Simple page break
	p.showPage()
	y_position = 10.5*inch
	p.setFont("Helvetica", 8)


	p.save()
	pdf_out = buffer.getvalue()
	buffer.close()
	return pdf_out

	def send_email_report(email_address, meeting_date, summary, key_decisions, transcription):
	smtp_server = os.environ.get("SMTP_SERVER")
	smtp_port = os.environ.get("SMTP_PORT")
	smtp_username = os.environ.get("SMTP_USERNAME")
	smtp_password = os.environ.get("SMTP_PASSWORD")
	sender_email = smtp_username # For simplicity, assuming sender is the same as username

	if not all([smtp_server, smtp_port, smtp_username, smtp_password, sender_email]):
	return False, "SMTP configuration is missing. Please set environment variables: SMTP_SERVER, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD."

	subject = f"Meeting Report - {meeting_date.strftime('%Y-%m-%d')}"
	body = f"Meeting Date: {meeting_date.strftime('%Y-%m-%d')}\n\nSummary:\n{summary}\n\nKey Decisions:\n{key_decisions}\n\nTranscription:\n{transcription}"

	msg = MIMEText(body)
	msg['Subject'] = subject
	msg['From'] = sender_email
	msg['To'] = email_address

	try:
	with smtplib.SMTP(smtp_server, smtp_port) as server:
	server.starttls()
	server.login(smtp_username, smtp_password)
	server.sendmail(sender_email, email_address, msg.as_string())
	return True, "Email sent successfully!"
	except Exception as e:
	return False, f"Email sending failed: {e}"


	# Main UI
	st.title("AI Meeting Notes & Reporting")

	# Meeting Date & Time
	meeting_date_time = st.date_input("Meeting Date & Time", datetime.today())

	# Number of speakers
	num_speakers = st.number_input("Number of speakers", min_value=1, max_value=10, value=st.session_state.num_speakers)
	st.session_state.num_speakers = num_speakers # Update session state

	# Language selection
	language = st.selectbox(
	"Language of report",
	["English", "Turkish", "Spanish", "French", "German"],
	index=["English", "Turkish", "Spanish", "French", "German"].index(st.session_state.language) if st.session_state.language in ["English", "Turkish", "Spanish", "French", "German"] else 0
	)
	st.session_state.language = language # Update session state

	# File upload
	uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])

	# Voice recording
	audio_bytes = mic_recorder(start_prompt="Record", stop_prompt="Stop recording", key='recorder')

	if audio_bytes:
	if isinstance(audio_bytes, dict) and "bytes" in audio_bytes: # Check if audio_bytes is a dict and has 'bytes' key
	st.audio(audio_bytes["bytes"], format="audio/wav")
	st.session_state.recorded_audio = audio_bytes["bytes"]
	else: # If not a dict or doesn't have 'bytes' key, assume it's raw bytes (fallback, might need adjustment)
	st.audio(audio_bytes, format="audio/wav")
	st.session_state.recorded_audio = audio_bytes


	# Diarization, Summarization and Key Decisions logic - Automatically after upload or record
	process_audio = False
	audio_source_indicator = ""

	if uploaded_file and uploaded_file != st.session_state.uploaded_file: # Check if a new file is uploaded
	st.session_state.uploaded_file = uploaded_file # Update session state
	st.session_state.recorded_audio = None # Reset recorded audio
	process_audio = True
	audio_source_indicator = f"Processing uploaded file: {uploaded_file.name}"
	elif st.session_state.recorded_audio and st.session_state.recorded_audio != getattr(st.session_state.get('last_recorded_audio_hash'), 'value', None): # Check if new recording
	st.session_state.last_recorded_audio_hash = st.session_state.recorded_audio # Store hash to detect new recordings
	st.session_state.uploaded_file = None # Reset uploaded file
	process_audio = True
	audio_source_indicator = "Processing recorded audio"


	if process_audio:
	st.session_state.diarization_output = None # Reset previous diarization output
	st.session_state.summary_output = None # Reset previous summary output
	st.session_state.key_decisions_output = None # Reset previous key decisions output
	st.session_state.chat_history = [] # Clear chat history for new file
	st.session_state.email_sent_message = "" # Clear email sent message

	with st.spinner(f"Processing audio and generating summary and key decisions... {audio_source_indicator}"):
	temp_path = None
	try:
	# Configure Gemini
	genai.configure(api_key=os.environ["GEMINI_API_KEY"])

	if st.session_state.uploaded_file: # Process uploaded file
	# Save uploaded file temporarily
	with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: # Assuming mp3 for wider compatibility, could adjust based on uploaded file type
	tmp_file.write(st.session_state.uploaded_file.getvalue())
	temp_path = tmp_file.name
	mime_type = "audio/mpeg" # Assuming mp3, adjust if needed based on file type
	gemini_file = genai.upload_file(temp_path, mime_type=mime_type)

	elif st.session_state.recorded_audio: # Process recorded audio
	# Save recorded audio temporarily (WAV from mic_recorder) and convert to MP3 if needed for Gemini
	with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file_wav:
	tmp_file_wav.write(st.session_state.recorded_audio)
	temp_path = tmp_file_wav.name
	gemini_file = genai.upload_file(temp_path, mime_type="audio/wav") # Assuming WAV is directly compatible


	# --- Diarization ---
	diarization_config = {
	"temperature": 0.5,
	"top_p": 0.95, #0.95
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_mime_type": "application/json",
	}

	diarization_model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-exp",
	generation_config=diarization_config,
	safety_settings={
	'HATE': 'BLOCK_NONE',
	'HARASSMENT': 'BLOCK_NONE',
	'SEXUAL': 'BLOCK_NONE',
	'DANGEROUS': 'BLOCK_NONE'
	}
	)

	chat_session_diarization = diarization_model.start_chat(
	history=[{"role": "user", "parts": [gemini_file]}]
	)

	response_diarization = chat_session_diarization.send_message(
	f"Generate meeting diarization of the meeting audio record provided in the file. "
	f"The meeting may be in a foreign language, expect a mixture of words in local language "
	f"and words in english. Provided audio has {num_speakers} speakers. "
	f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
	f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
	f"speech (transcription). Do not transcribe filler words."
	)

	json_data_diarization = json.loads(response_diarization.text)
	formatted_output = ""
	for item in json_data_diarization:
	formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"

	st.session_state.diarization_output = formatted_output

	# --- Summarization ---
	summarization_config = {
	"temperature": 0.25,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_schema": content.Schema(
	type=content.Type.OBJECT,
	enum=[],
	required=["summary"],
	properties={
	"summary": content.Schema(
	type=content.Type.STRING,
	),
	},
	),
	"response_mime_type": "application/json",
	}

	summarization_model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-exp",
	generation_config=summarization_config,
	safety_settings={
	'HATE': 'BLOCK_NONE',
	'HARASSMENT': 'BLOCK_NONE',
	'SEXUAL': 'BLOCK_NONE',
	'DANGEROUS': 'BLOCK_NONE'
	}
	)

	chat_session_summarization = summarization_model.start_chat(
	history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
	)

	response_summarization = chat_session_summarization.send_message(
	f"Generate a detailed summarization of the meeting, provide information on "
	f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
	f"Provide structured output with only one tag 'summary'. Generate response in {language}."
	)

	json_data_summarization = json.loads(response_summarization.text)
	summary = json_data_summarization.get('summary', "No summary found.")
	st.session_state.summary_output = summary
	st.session_state.chat_history.append(("Summary", summary))

	# --- Key Decisions ---
	key_decisions_config = {
	"temperature": 0.25,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_schema": content.Schema(
	type=content.Type.OBJECT,
	enum=[],
	required=["key_decisions"],
	properties={
	"key_decisions": content.Schema(
	type=content.Type.STRING,
	),
	},
	),
	"response_mime_type": "application/json",
	}

	key_decisions_model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-exp",
	generation_config=key_decisions_config,
	safety_settings={
	'HATE': 'BLOCK_NONE',
	'HARASSMENT': 'BLOCK_NONE',
	'SEXUAL': 'BLOCK_NONE',
	'DANGEROUS': 'BLOCK_NONE'
	}
	)

	chat_session_key_decisions = key_decisions_model.start_chat(
	history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
	)

	response_key_decisions = chat_session_key_decisions.send_message(
	f"Identify and list the key decisions made during the meeting. "
	f"Generate response in {language}."
	)

	json_data_key_decisions = json.loads(response_key_decisions.text)
	key_decisions = json_data_key_decisions.get('key_decisions', "No key decisions found.")
	st.session_state.key_decisions_output = key_decisions


	except Exception as e:
	st.error(f"Error processing audio: {str(e)}")
	finally:
	# Clean up temp file if created
	if temp_path:
	os.unlink(temp_path)

	# Diarization output display
	if st.session_state.diarization_output:
	st.subheader("Diarization Output")
	st.text_area("Transcript", st.session_state.diarization_output, height=300)

	# Summary output
	if st.session_state.summary_output:
	st.subheader("Summary")
	st.write(st.session_state.summary_output)

	# Key decisions output
	if st.session_state.key_decisions_output:
	st.subheader("Key decisions")
	key_decisions_list = st.session_state.key_decisions_output.strip().split('\n') # Split by newline
	for decision in key_decisions_list:
	if decision.strip(): # make sure decision is not empty
	st.markdown(f"- {decision.strip()}")


	# Generate PDF Report button
	if st.button("Generate PDF report"):
	if st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output:
	pdf_bytes = generate_pdf_report(
	meeting_date_time,
	st.session_state.summary_output,
	st.session_state.key_decisions_output,
	st.session_state.diarization_output
	)
	st.download_button(
	label="Download PDF Report",
	data=pdf_bytes,
	file_name="meeting_report.pdf",
	mime="application/pdf"
	)
	else:
	st.warning("Please upload or record audio to generate report.")

	# Q&A section
	if st.session_state.diarization_output:
	st.subheader("Question Answering")
	question = st.text_input("Type in your question")
	if st.button("Send"):
	if question:
	# Add user question to chat history
	st.session_state.chat_history.append(("User", question))

	with st.spinner("Generating response..."):
	try:
	# Configure QnA model
	qna_config = {
	"temperature": 0.25,
	"top_p": 0.95,
	"top_k": 40,
	"max_output_tokens": 8192,
	"response_schema": content.Schema(
	type=content.Type.OBJECT,
	enum=[],
	required=["answer"],
	properties={
	"answer": content.Schema(
	type=content.Type.STRING,
	),
	},
	),
	"response_mime_type": "application/json",
	}

	qna_model = genai.GenerativeModel(
	model_name="gemini-2.0-flash-exp",
	generation_config=qna_config,
	safety_settings={
	'HATE': 'BLOCK_NONE',
	'HARASSMENT': 'BLOCK_NONE',
	'SEXUAL': 'BLOCK_NONE',
	'DANGEROUS': 'BLOCK_NONE'
	}
	)

	# Generate answer
	chat_session_qna = qna_model.start_chat(
	history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
	)

	response_qna = chat_session_qna.send_message(
	f"Answer the following question based on the meeting: {question}. Generate response in {language}."
	f"Provide structured output with only one tag 'answer'."
	)

	json_data_qna = json.loads(response_qna.text)
	answer = json_data_qna.get('answer', "No answer found.")

	# Add bot response to chat history
	st.session_state.chat_history.append(("Bot", answer))
	st.rerun() # Rerun to update the chat display

	except Exception as e:
	st.error(f"Error generating answer: {str(e)}")

	# Chat history display for Q&A
	for role, message in st.session_state.chat_history:
	if role == "User":
	st.write(f"Question: {message}")
	elif role == "Bot":
	st.write(f"Answer: {message}")

	# Email input and Send Report button
	st.subheader("Share Report")
	email_address = st.text_input("Email address:")

	send_button = st.button("Send Report")
	if send_button:
	if not email_address:
	st.warning("Please enter an email address.")
	elif not (st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output):
	st.warning("Please upload or record audio and generate report first.")
	else:
	success, message = send_email_report(
	email_address,
	meeting_date_time,
	st.session_state.summary_output,
	st.session_state.key_decisions_output,
	st.session_state.diarization_output
	)
	if success:
	st.success(message)
	else:
	st.error(message)