Spaces:

turanhasan
/

BoraAk

Sleeping

App Files Files Community

turanhasan commited on 26 days ago

Commit

8e34cf7

verified ·

1 Parent(s): 533ff7a

Create app.py

Browse files

Files changed (1) hide show

app.py +234 -0

app.py ADDED Viewed

	@@ -0,0 +1,234 @@

+import streamlit as st
+import os
+import google.generativeai as genai
+from google.ai.generativelanguage_v1beta.types import content
+import json
+from tempfile import NamedTemporaryFile
+# Initialize session state for chat history if it doesn't exist
+if 'chat_history' not in st.session_state:
+    st.session_state.chat_history = []
+if 'diarization_output' not in st.session_state:
+    st.session_state.diarization_output = None
+# Configuration for the page
+st.set_page_config(
+    page_title="Meeting Audio Analyzer",
+    layout="wide"
+)
+# Sidebar controls
+with st.sidebar:
+    st.title("Settings")
+    # Language selection
+    language = st.selectbox(
+        "Select language",
+        ["English", "Turkish", "Spanish", "French", "German"]
+    )
+    # Number of speakers
+    num_speakers = st.number_input(
+        "Enter number of speakers",
+        min_value=1,
+        max_value=10,
+        value=2
+    )
+    # File upload
+    uploaded_file = st.file_uploader("Upload audio file", type=['mp3', 'wav'])
+    # Action buttons
+    if uploaded_file:
+        if st.button("Diarize"):
+            with st.spinner("Processing audio..."):
+                # Save uploaded file temporarily
+                with NamedTemporaryFile(delete=False, suffix='.mp3') as tmp_file:
+                    tmp_file.write(uploaded_file.getvalue())
+                    temp_path = tmp_file.name
+                try:
+                    # Configure Gemini
+                    genai.configure(api_key=os.environ["GEMINI_API_KEY"])
+                    # Upload to Gemini
+                    gemini_file = genai.upload_file(temp_path, mime_type="audio/mpeg")
+                    # Create diarization model and process
+                    diarization_config = {
+                        "temperature": 0.5,
+                        "top_p": 0.95, #0.95
+                        "top_k": 40,
+                        "max_output_tokens": 8192,
+                        "response_mime_type": "application/json",
+                    }
+                    diarization_model = genai.GenerativeModel(
+                        model_name="gemini-2.0-flash-exp",
+                        generation_config=diarization_config,
+                        safety_settings={
+                            'HATE': 'BLOCK_NONE',
+                            'HARASSMENT': 'BLOCK_NONE',
+                            'SEXUAL': 'BLOCK_NONE',
+                            'DANGEROUS': 'BLOCK_NONE'
+                        }
+                    )
+                    # Process diarization
+                    chat_session = diarization_model.start_chat(
+                        history=[{"role": "user", "parts": [gemini_file]}]
+                    )
+                    response = chat_session.send_message(
+                        f"Generate meeting diarization of the meeting audio record provided in the file. "
+                        f"The meeting may be in a foreign language, expect a mixture of words in local language "
+                        f"and words in english. Provided audio has {num_speakers} speakers. "
+                        f"Accurately name the speakers or use labels like SPEAKER_01, SPEAKER_02, SPEAKER_03 and so on. "
+                        f"Provide a structured JSON output. timestamp (hh:mm:ss), speaker (name only), "
+                        f"speech (transcription). Do not transcribe filler words."
+                    )
+                    json_data = json.loads(response.text)
+                    formatted_output = ""
+                    for item in json_data:
+                        formatted_output += f"{item['timestamp']} - {item['speaker']}: {item['speech']}\n\n"
+                    st.session_state.diarization_output = formatted_output
+                except Exception as e:
+                    st.error(f"Error processing audio: {str(e)}")
+                finally:
+                    # Clean up temp file
+                    os.unlink(temp_path)
+        if st.button("Summarize") and st.session_state.diarization_output:
+            with st.spinner("Generating summary..."):
+                try:
+                    # Configure summarization model
+                    summarization_config = {
+                        "temperature": 0.25,
+                        "top_p": 0.95,
+                        "top_k": 40,
+                        "max_output_tokens": 8192,
+                        "response_schema": content.Schema(
+                            type=content.Type.OBJECT,
+                            enum=[],
+                            required=["summary"],
+                            properties={
+                                "summary": content.Schema(
+                                    type=content.Type.STRING,
+                                ),
+                            },
+                        ),
+                        "response_mime_type": "application/json",
+                    }
+                    summarization_model = genai.GenerativeModel(
+                        model_name="gemini-2.0-flash-exp",
+                        generation_config=summarization_config,
+                        safety_settings={
+                            'HATE': 'BLOCK_NONE',
+                            'HARASSMENT': 'BLOCK_NONE',
+                            'SEXUAL': 'BLOCK_NONE',
+                            'DANGEROUS': 'BLOCK_NONE'
+                        }
+                    )
+                    # Generate summary
+                    chat_session = summarization_model.start_chat(
+                        history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
+                    )
+                    response = chat_session.send_message(
+                        f"Generate a detailed summarization of the meeting, provide information on "
+                        f"the topic of the meeting, agenda, things discussed and future plans if any mentioned. "
+                        f"Provide structured output with only one tag 'summary'. Generate response in {language}."
+                    )
+                    json_data = json.loads(response.text)
+                    summary = json_data.get('summary', "No summary found.")
+                    st.session_state.chat_history.append(("Summary", summary))
+                except Exception as e:
+                    st.error(f"Error generating summary: {str(e)}")
+# Main chat interface
+st.title("Meeting Audio Analyzer")
+# Diarization output in collapsible section
+if st.session_state.diarization_output:
+    with st.expander("Diarization Output", expanded=False):
+        st.text_area("Transcript", st.session_state.diarization_output, height=300)
+# Chat history
+for role, message in st.session_state.chat_history:
+    if role == "User":
+        st.write(f"User: {message}")
+    elif role == "Bot":
+        st.write(f"Bot: {message}")
+    else:  # Summary
+        st.write("Meeting Summary:")
+        st.write(message)
+# Question input
+if st.session_state.diarization_output:
+    question = st.text_input("Type in your question")
+    if st.button("Send"):
+        if question:
+            # Add user question to chat history
+            st.session_state.chat_history.append(("User", question))
+            with st.spinner("Generating response..."):
+                try:
+                    # Configure QnA model
+                    qna_config = {
+                        "temperature": 0.25,
+                        "top_p": 0.95,
+                        "top_k": 40,
+                        "max_output_tokens": 8192,
+                        "response_schema": content.Schema(
+                            type=content.Type.OBJECT,
+                            enum=[],
+                            required=["answer"],
+                            properties={
+                                "answer": content.Schema(
+                                    type=content.Type.STRING,
+                                ),
+                            },
+                        ),
+                        "response_mime_type": "application/json",
+                    }
+                    qna_model = genai.GenerativeModel(
+                        model_name="gemini-2.0-flash-exp",
+                        generation_config=qna_config,
+                        safety_settings={
+                            'HATE': 'BLOCK_NONE',
+                            'HARASSMENT': 'BLOCK_NONE',
+                            'SEXUAL': 'BLOCK_NONE',
+                            'DANGEROUS': 'BLOCK_NONE'
+                        }
+                    )
+                    # Generate answer
+                    chat_session = qna_model.start_chat(
+                        history=[{"role": "user", "parts": [st.session_state.diarization_output]}]
+                    )
+                    response = chat_session.send_message(
+                        f"Answer the following question based on the meeting: {question}. Generate response in {language}."
+                        f"Provide structured output with only one tag 'answer'."
+                    )
+                    json_data = json.loads(response.text)
+                    answer = json_data.get('answer', "No answer found.")
+                    # Add bot response to chat history
+                    st.session_state.chat_history.append(("Bot", answer))
+                    # Rerun to update the chat display
+                    st.rerun()
+                except Exception as e:
+                    st.error(f"Error generating answer: {str(e)}")