Spaces:

turanhasan
/

BoraAk

Sleeping

App Files Files Community

turanhasan commited on 23 days ago

Commit

1f244c9

verified ·

1 Parent(s): e82dfa6

Update app.py

Browse files

Files changed (1) hide show

app.py +473 -98

app.py CHANGED Viewed

@@ -1,108 +1,483 @@
 import streamlit as st
-import requests
-from transformers import pipeline
 from reportlab.pdfgen import canvas
 import smtplib
-from email.message import EmailMessage
-import os
-# Google Speech-to-Text API Anahtarı
-os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "your-google-cloud-key.json"
-# Gemini API Anahtarı
-GEMINI_API_KEY = "your-gemini-api-key"
-# Hugging Face Özetleme Modeli
-summary_pipeline = pipeline("summarization", model="facebook/bart-large-cnn")
-# Streamlit Sayfası Başlığı
-st.set_page_config(page_title="AI-Powered Meeting Notes & Reporting", layout="wide")
-st.title("📢 AI Meeting Notes & Reporting System")
-### 📌 1️⃣ Speech-to-Text (Konuşmayı Metne Çevirme) ###
-def speech_to_text(audio_path):
-    recognizer = sr.Recognizer()
-    with sr.AudioFile(audio_path) as source:
-        audio_data = recognizer.record(source)
         try:
-            text = recognizer.recognize_google(audio_data, language="tr-TR")
-            return text
-        except sr.UnknownValueError:
-            return "❌ Ses anlaşılamadı."
-        except sr.RequestError:
-            return "❌ Google API'ye erişilemiyor."
-uploaded_audio = st.file_uploader("🔊 Ses dosyanızı yükleyin (WAV formatında)", type=["wav"])
-if uploaded_audio:
-    with open("temp.wav", "wb") as f:
-        f.write(uploaded_audio.getbuffer())
-    st.audio(uploaded_audio, format="audio/wav")
-    transcript = speech_to_text("temp.wav")
-    st.subheader("🎤 Metne Dönüştürülen Konuşma:")
-    st.write(transcript)
-### 📌 2️⃣ Metni Özetleme (Gemini API) ###
-def summarize_text(text):
-    url = "https://generativelanguage.googleapis.com/v1/models/gemini-pro:generateText"
-    headers = {"Authorization": f"Bearer {GEMINI_API_KEY}"}
-    payload = {"prompt": f"Summarize the following meeting transcript:\n{text}", "max_tokens": 500}
-    response = requests.post(url, json=payload, headers=headers)
-    return response.json()["choices"][0]["text"]
-if st.button("🔍 Toplantıyı Özetle"):
-    if transcript:
-        summary = summarize_text(transcript)
-        st.subheader("📌 Toplant�� Özeti:")
-        st.write(summary)
-### 📌 3️⃣ Kararları Çıkarma (Hugging Face) ###
-def extract_decisions(text):
-    summary = summary_pipeline(text, max_length=100, min_length=30, do_sample=False)
-    return summary[0]['summary_text']
-if st.button("✅ Alınan Kararları Çıkar"):
-    if transcript:
-        decisions = extract_decisions(transcript)
-        st.subheader("📋 Alınan Kararlar:")
-        st.write(decisions)
-### 📌 4️⃣ PDF Raporu Oluşturma ###
-def create_pdf(text, filename="meeting_report.pdf"):
-    c = canvas.Canvas(filename)
-    c.drawString(100, 750, "Meeting Report")
-    c.drawString(100, 730, text)
-    c.save()
-    return filename
-if st.button("📄 PDF Raporu Oluştur"):
-    if transcript:
-        pdf_file = create_pdf(transcript)
-        st.success("📄 Rapor oluşturuldu! Aşağıdan indirebilirsiniz.")
-        with open(pdf_file, "rb") as f:
-            st.download_button("📥 Raporu İndir", f, file_name="meeting_report.pdf", mime="application/pdf")
-### 📌 5️⃣ E-Posta Gönderme ###
-def send_email(pdf_file, recipient_email):
-    email = "[email protected]"
-    password = "your-email-password"
-    msg = EmailMessage()
-    msg['Subject'] = 'Meeting Report'
-    msg['From'] = email
-    msg['To'] = recipient_email
-    msg.set_content('The meeting report is attached.')
-    with open(pdf_file, 'rb') as f:
-        file_data = f.read()
-        msg.add_attachment(file_data, maintype='application', subtype='pdf', filename=pdf_file)
-    with smtplib.SMTP_SSL('smtp.gmail.com', 465) as server:
-        server.login(email, password)
-        server.send_message(msg)
-email_address = st.text_input("📧 E-Posta Adresi:")
-if st.button("📤 Raporu Gönder"):
-    if email_address:
-        send_email("meeting_report.pdf", email_address)
-        st.success("📤 E-posta başarıyla gönderildi!")

 import streamlit as st
+import os
+import google.generativeai as genai
+from google.ai.generativelanguage_v1beta.types import content
+import json
+from tempfile import NamedTemporaryFile
+from datetime import datetime
+import io
 from reportlab.pdfgen import canvas
+from reportlab.lib.pagesizes import letter
+from reportlab.lib.units import inch
 import smtplib
+from email.mime.text import MIMEText
+from streamlit_mic_recorder import mic_recorder
+import wave
+# Install streamlit-mic-recorder if not already installed:
+# pip install streamlit-mic-recorder
+# Initialize session state for chat history if it doesn't exist
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'diarization_output' not in st.session_state:
+    st.session_state.diarization_output = None
+if 'uploaded_file' not in st.session_state:
+    st.session_state.uploaded_file = None
+if 'language' not in st.session_state:
+    st.session_state.language = "English"
+if 'num_speakers' not in st.session_state:
+    st.session_state.num_speakers = 2
+if 'summary_output' not in st.session_state:
+    st.session_state.summary_output = None
+if 'key_decisions_output' not in st.session_state:
+    st.session_state.key_decisions_output = None
+if 'email_sent_message' not in st.session_state:
+    st.session_state.email_sent_message = ""
+if 'recorded_audio' not in st.session_state:
+    st.session_state.recorded_audio = None
+# Configuration for the page
+st.set_page_config(
+    page_title="AI Meeting Notes & Reporting",
+    layout="wide"
+)
+# Function to generate PDF report
+def generate_pdf_report(meeting_date, summary, key_decisions, transcription):
+    buffer = io.BytesIO()
+    p = canvas.Canvas(buffer, pagesize=letter)
+    p.setFont("Helvetica-Bold", 16)
+    p.drawString(inch, 10.5*inch, "Meeting Report")
+    p.setFont("Helvetica", 12)
+    p.drawString(inch, 10*inch, f"Date: {meeting_date.strftime('%Y-%m-%d')}")
+    y_position = 9.5*inch
+    p.setFont("Helvetica-Bold", 12)
+    p.drawString(inch, y_position, "Summary:")
+    y_position -= 0.3*inch
+    p.setFont("Helvetica", 10)
+    summary_lines = summary.split('\n')
+    for line in summary_lines:
+        p.drawString(inch, y_position, line)
+        y_position -= 0.2*inch
+        if y_position < 1*inch: # Simple page break to avoid content overflow - improve if needed
+            p.showPage()
+            y_position = 10.5*inch
+            p.setFont("Helvetica", 10)
+    if key_decisions:
+        p.setFont("Helvetica-Bold", 12)
+        p.drawString(inch, y_position, "Key Decisions:")
+        y_position -= 0.3*inch
+        p.setFont("Helvetica", 10)
+        key_decisions_list = key_decisions.strip().split('\n')
+        for decision in key_decisions_list:
+            if decision.strip():
+                p.drawString(inch, y_position, f"- {decision.strip()}")
+                y_position -= 0.2*inch
+                if y_position < 1*inch: # Simple page break
+                    p.showPage()
+                    y_position = 10.5*inch
+                    p.setFont("Helvetica", 10)
+    if transcription:
+        p.setFont("Helvetica-Bold", 12)
+        p.drawString(inch, y_position, "Transcription:")
+        y_position -= 0.3*inch
+        p.setFont("Helvetica", 8) # Smaller font for transcription
+        transcription_lines = transcription.split('\n')
+        for line in transcription_lines:
+            p.drawString(inch, y_position, line)
+            y_position -= 0.15*inch # Reduced line spacing for transcription
+            if y_position < 1*inch: # Simple page break
+                p.showPage()
+                y_position = 10.5*inch
+                p.setFont("Helvetica", 8)
+    p.save()
+    pdf_out = buffer.getvalue()
+    buffer.close()
+    return pdf_out
+def send_email_report(email_address, meeting_date, summary, key_decisions, transcription):
+    smtp_server = os.environ.get("SMTP_SERVER")
+    smtp_port = os.environ.get("SMTP_PORT")
+    smtp_username = os.environ.get("SMTP_USERNAME")
+    smtp_password = os.environ.get("SMTP_PASSWORD")
+    sender_email = smtp_username # For simplicity, assuming sender is the same as username
+    if not all([smtp_server, smtp_port, smtp_username, smtp_password, sender_email]):
+        return False, "SMTP configuration is missing. Please set environment variables: SMTP_SERVER, SMTP_PORT, SMTP_USERNAME, SMTP_PASSWORD."
+    subject = f"Meeting Report - {meeting_date.strftime('%Y-%m-%d')}"
+    body = f"Meeting Date: {meeting_date.strftime('%Y-%m-%d')}\n\nSummary:\n{summary}\n\nKey Decisions:\n{key_decisions}\n\nTranscription:\n{transcription}"
+    msg = MIMEText(body)
+    msg['Subject'] = subject
+    msg['From'] = sender_email
+    msg['To'] = email_address
+    try:
+        with smtplib.SMTP(smtp_server, smtp_port) as server:
+            server.starttls()
+            server.login(smtp_username, smtp_password)
+            server.sendmail(sender_email, email_address, msg.as_string())
+        return True, "Email sent successfully!"
+    except Exception as e:
+        return False, f"Email sending failed: {e}"
+# Main UI
+st.title("AI Meeting Notes & Reporting")
+# Meeting Date & Time
+meeting_date_time = st.date_input("Meeting Date & Time", datetime.today())
+# Number of speakers
+num_speakers = st.number_input("Number of speakers", min_value=1, max_value=10, value=st.session_state.num_speakers)
+st.session_state.num_speakers = num_speakers # Update session state
+# Language selection
+language = st.selectbox(
+    "Language of report",
+    ["English", "Turkish", "Spanish", "French", "German"],
+    index=["English", "Turkish", "Spanish", "French", "German"].index(st.session_state.language) if st.session_state.language in ["English", "Turkish", "Spanish", "French", "German"] else 0
+)
+st.session_state.language = language # Update session state
+# File upload
+uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])
+# Voice recording
+audio_bytes = mic_recorder(start_prompt="Record", stop_prompt="Stop recording", key='recorder')
+if audio_bytes:
+    if isinstance(audio_bytes, dict) and "bytes" in audio_bytes: # Check if audio_bytes is a dict and has 'bytes' key
+        st.audio(audio_bytes["bytes"], format="audio/wav")
+        st.session_state.recorded_audio = audio_bytes["bytes"]
+    else: # If not a dict or doesn't have 'bytes' key, assume it's raw bytes (fallback, might need adjustment)
+        st.audio(audio_bytes, format="audio/wav")
+        st.session_state.recorded_audio = audio_bytes
+# Diarization, Summarization and Key Decisions logic - Automatically after upload or record
+process_audio = False
+audio_source_indicator = ""
+if uploaded_file and uploaded_file != st.session_state.uploaded_file: # Check if a new file is uploaded
+    st.session_state.uploaded_file = uploaded_file # Update session state
+    st.session_state.recorded_audio = None # Reset recorded audio
+    process_audio = True
+    audio_source_indicator = f"Processing uploaded file: {uploaded_file.name}"
+elif st.session_state.recorded_audio and st.session_state.recorded_audio != getattr(st.session_state.get('last_recorded_audio_hash'), 'value', None): # Check if new recording
+    st.session_state.last_recorded_audio_hash = st.session_state.recorded_audio # Store hash to detect new recordings
+    st.session_state.uploaded_file = None # Reset uploaded file
+    process_audio = True
+    audio_source_indicator = "Processing recorded audio"
+if process_audio:
+    st.session_state.diarization_output = None # Reset previous diarization output
+    st.session_state.summary_output = None # Reset previous summary output
+    st.session_state.key_decisions_output = None # Reset previous key decisions output
+    st.session_state.chat_history = [] # Clear chat history for new file
+    st.session_state.email_sent_message = "" # Clear email sent message
+    with st.spinner(f"Processing audio and generating summary and key decisions... {audio_source_indicator}"):
+        temp_path = None
         try:
+            # Configure Gemini
+            genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+            if st.session_state.uploaded_file: # Process uploaded file
+                # Save uploaded file temporarily
+                with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file: # Assuming mp3 for wider compatibility, could adjust based on uploaded file type
+                    tmp_file.write(st.session_state.uploaded_file.getvalue())
+                    temp_path = tmp_file.name
+                    mime_type = "audio/mpeg" # Assuming mp3, adjust if needed based on file type
+                    gemini_file = genai.upload_file(temp_path, mime_type=mime_type)
+            elif st.session_state.recorded_audio: # Process recorded audio
+                # Save recorded audio temporarily (WAV from mic_recorder) and convert to MP3 if needed for Gemini
+                with NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file_wav:
+                    tmp_file_wav.write(st.session_state.recorded_audio)
+                    temp_path = tmp_file_wav.name
+                    gemini_file = genai.upload_file(temp_path, mime_type="audio/wav") # Assuming WAV is directly compatible
+            # --- Diarization ---
+            diarization_config = {
+                "temperature": 0.5,
+                "top_p": 0.95, #0.95
+                "top_k": 40,
+                "max_output_tokens": 8192,
+                "response_mime_type": "application/json",
+            }
+            diarization_model = genai.GenerativeModel(
+                model_name="gemini-2.0-flash-exp",
+                generation_config=diarization_config,
+                safety_settings={
+                    'HATE': 'BLOCK_NONE',
+                    'HARASSMENT': 'BLOCK_NONE',
+                    'SEXUAL': 'BLOCK_NONE',
+                    'DANGEROUS': 'BLOCK_NONE'
+                }
+            )
+            chat_session_diarization = diarization_model.start_chat(
+                history=[{"role": "user", "parts": [gemini_file]}]
+            )
+            response_diarization = chat_session_diarization.send_message(
+                f"Generate meeting diarization of the meeting audio record provided in the file. "
+                f"The meeting may be in a foreign language, expect a mixture of words in local language "
+                f"and words in english. Provided audio has {num_speakers} speakers. "
+                f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
+                f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
+                f"speech (transcription). Do not transcribe filler words."
+            )
+            json_data_diarization = json.loads(response_diarization.text)
+            formatted_output = ""
+            for item in json_data_diarization:
+                formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"
+            st.session_state.diarization_output = formatted_output
+            # --- Summarization ---
+            summarization_config = {
+                "temperature": 0.25,
+                "top_p": 0.95,
+                "top_k": 40,
+                "max_output_tokens": 8192,
+                "response_schema": content.Schema(
+                    type=content.Type.OBJECT,
+                    enum=[],
+                    required=["summary"],
+                    properties={
+                        "summary": content.Schema(
+                            type=content.Type.STRING,
+                        ),
+                    },
+                ),
+                "response_mime_type": "application/json",
+            }
+            summarization_model = genai.GenerativeModel(
+                model_name="gemini-2.0-flash-exp",
+                generation_config=summarization_config,
+                safety_settings={
+                    'HATE': 'BLOCK_NONE',
+                    'HARASSMENT': 'BLOCK_NONE',
+                    'SEXUAL': 'BLOCK_NONE',
+                    'DANGEROUS': 'BLOCK_NONE'
+                }
+            )
+            chat_session_summarization = summarization_model.start_chat(
+                history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
+            )
+            response_summarization = chat_session_summarization.send_message(
+                f"Generate a detailed summarization of the meeting, provide information on "
+                f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
+                f"Provide structured output with only one tag 'summary'. Generate response in {language}."
+            )
+            json_data_summarization = json.loads(response_summarization.text)
+            summary = json_data_summarization.get('summary', "No summary found.")
+            st.session_state.summary_output = summary
+            st.session_state.chat_history.append(("Summary", summary))
+            # --- Key Decisions ---
+            key_decisions_config = {
+                "temperature": 0.25,
+                "top_p": 0.95,
+                "top_k": 40,
+                "max_output_tokens": 8192,
+                "response_schema": content.Schema(
+                    type=content.Type.OBJECT,
+                    enum=[],
+                    required=["key_decisions"],
+                    properties={
+                        "key_decisions": content.Schema(
+                            type=content.Type.STRING,
+                        ),
+                    },
+                ),
+                "response_mime_type": "application/json",
+            }
+            key_decisions_model = genai.GenerativeModel(
+                model_name="gemini-2.0-flash-exp",
+                generation_config=key_decisions_config,
+                safety_settings={
+                    'HATE': 'BLOCK_NONE',
+                    'HARASSMENT': 'BLOCK_NONE',
+                    'SEXUAL': 'BLOCK_NONE',
+                    'DANGEROUS': 'BLOCK_NONE'
+                }
+            )
+            chat_session_key_decisions = key_decisions_model.start_chat(
+                history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
+            )
+            response_key_decisions = chat_session_key_decisions.send_message(
+                f"Identify and list the key decisions made during the meeting. "
+                f"Generate response in {language}."
+            )
+            json_data_key_decisions = json.loads(response_key_decisions.text)
+            key_decisions = json_data_key_decisions.get('key_decisions', "No key decisions found.")
+            st.session_state.key_decisions_output = key_decisions
+        except Exception as e:
+            st.error(f"Error processing audio: {str(e)}")
+        finally:
+            # Clean up temp file if created
+            if temp_path:
+                os.unlink(temp_path)
+# Diarization output display
+if st.session_state.diarization_output:
+    st.subheader("Diarization Output")
+    st.text_area("Transcript", st.session_state.diarization_output, height=300)
+# Summary output
+if st.session_state.summary_output:
+    st.subheader("Summary")
+    st.write(st.session_state.summary_output)
+# Key decisions output
+if st.session_state.key_decisions_output:
+    st.subheader("Key decisions")
+    key_decisions_list = st.session_state.key_decisions_output.strip().split('\n') # Split by newline
+    for decision in key_decisions_list:
+        if decision.strip(): # make sure decision is not empty
+            st.markdown(f"- {decision.strip()}")
+# Generate PDF Report button
+if st.button("Generate PDF report"):
+    if st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output:
+        pdf_bytes = generate_pdf_report(
+            meeting_date_time,
+            st.session_state.summary_output,
+            st.session_state.key_decisions_output,
+            st.session_state.diarization_output
+        )
+        st.download_button(
+            label="Download PDF Report",
+            data=pdf_bytes,
+            file_name="meeting_report.pdf",
+            mime="application/pdf"
+        )
+    else:
+        st.warning("Please upload or record audio to generate report.")
+# Q&A section
+if st.session_state.diarization_output:
+    st.subheader("Question Answering")
+    question = st.text_input("Type in your question")
+    if st.button("Send"):
+        if question:
+            # Add user question to chat history
+            st.session_state.chat_history.append(("User", question))
+            with st.spinner("Generating response..."):
+                try:
+                    # Configure QnA model
+                    qna_config = {
+                        "temperature": 0.25,
+                        "top_p": 0.95,
+                        "top_k": 40,
+                        "max_output_tokens": 8192,
+                        "response_schema": content.Schema(
+                            type=content.Type.OBJECT,
+                            enum=[],
+                            required=["answer"],
+                            properties={
+                                "answer": content.Schema(
+                                    type=content.Type.STRING,
+                                ),
+                            },
+                        ),
+                        "response_mime_type": "application/json",
+                    }
+                    qna_model = genai.GenerativeModel(
+                        model_name="gemini-2.0-flash-exp",
+                        generation_config=qna_config,
+                        safety_settings={
+                            'HATE': 'BLOCK_NONE',
+                            'HARASSMENT': 'BLOCK_NONE',
+                            'SEXUAL': 'BLOCK_NONE',
+                            'DANGEROUS': 'BLOCK_NONE'
+                        }
+                    )
+                    # Generate answer
+                    chat_session_qna = qna_model.start_chat(
+                        history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
+                    )
+                    response_qna = chat_session_qna.send_message(
+                        f"Answer the following question based on the meeting: {question}. Generate response in {language}."
+                        f"Provide structured output with only one tag 'answer'."
+                    )
+                    json_data_qna = json.loads(response_qna.text)
+                    answer = json_data_qna.get('answer', "No answer found.")
+                    # Add bot response to chat history
+                    st.session_state.chat_history.append(("Bot", answer))
+                    st.rerun() # Rerun to update the chat display
+                except Exception as e:
+                    st.error(f"Error generating answer: {str(e)}")
+# Chat history display for Q&A
+for role, message in st.session_state.chat_history:
+    if role == "User":
+        st.write(f"**Question**: {message}")
+    elif role == "Bot":
+        st.write(f"**Answer**: {message}")
+# Email input and Send Report button
+st.subheader("Share Report")
+email_address = st.text_input("Email address:")
+send_button = st.button("Send Report")
+if send_button:
+    if not email_address:
+        st.warning("Please enter an email address.")
+    elif not (st.session_state.summary_output and st.session_state.key_decisions_output and st.session_state.diarization_output):
+        st.warning("Please upload or record audio and generate report first.")
+    else:
+        success, message = send_email_report(
+            email_address,
+            meeting_date_time,
+            st.session_state.summary_output,
+            st.session_state.key_decisions_output,
+            st.session_state.diarization_output
+        )
+        if success:
+            st.success(message)
+        else:
+            st.error(message)