Spaces:

irfansaleem48
/

AI_Meeting_Assistant

Sleeping

App Files Files Community

irfansaleem48 commited on Feb 23

Commit

cc9e28a

verified ·

1 Parent(s): 4e7e944

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -106

app.py CHANGED Viewed

@@ -1,118 +1,43 @@
 import streamlit as st
-import whisper
-import torch
 from transformers import pipeline
-import spacy
-import subprocess
-from summa import keywords
-import datetime
 import os
-from pydub import AudioSegment
-import concurrent.futures
-@st.cache_resource
-def load_models():
-    device = "cuda" if torch.cuda.is_available() else "cpu"
-    whisper_model = whisper.load_model("small").to(device)  # Using 'small' for faster speed
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
-    try:
-        nlp = spacy.load("en_core_web_sm")
-    except OSError:
-        subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
-        nlp = spacy.load("en_core_web_sm")
-    return whisper_model, summarizer, nlp, device
-def split_audio(file_path, chunk_length_ms=60000):  # 60 seconds per chunk
-    audio = AudioSegment.from_file(file_path)
-    chunks = [audio[i : i + chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]
-    return chunks
-def transcribe_chunk(whisper_model, chunk_path, device):
-    options = {"fp16": False} if device == "cpu" else {"fp16": True}
-    return whisper_model.transcribe(chunk_path, **options)["text"]
-def extract_action_items(text, nlp):
-    doc = nlp(text)
-    actions = []
-    for sent in doc.sents:
-        for token in sent:
-            if token.dep_ == "ROOT" and token.pos_ == "VERB":
-                action = {
-                    "text": sent.text,
-                    "responsible": [],
-                    "deadline": []
-                }
-                for ent in sent.ents:
-                    if ent.label_ == "PERSON":
-                        action["responsible"].append(ent.text)
-                    elif ent.label_ == "DATE":
-                        action["deadline"].append(ent.text)
-                actions.append(action)
-                break
-    return actions
-def main():
-    st.title("🤖 Smart AI Meeting Assistant")
-    whisper_model, summarizer, nlp, device = load_models()
-    audio_file = st.file_uploader("Upload meeting audio", type=["wav", "mp3", "m4a", "ogg", "flac"])
-    if audio_file is not None:
-        file_path = f"uploaded_audio_{datetime.datetime.now().timestamp()}.wav"
-        with open(file_path, "wb") as f:
-            f.write(audio_file.getbuffer())
-        st.subheader("Meeting Transcription")
-        with st.spinner("Transcribing audio..."):
-            chunks = split_audio(file_path)
-            chunk_paths = []
-            for i, chunk in enumerate(chunks):
-                chunk_path = f"chunk_{i}.wav"
-                chunk.export(chunk_path, format="wav")
-                chunk_paths.append(chunk_path)
-            with concurrent.futures.ThreadPoolExecutor() as executor:
-                transcripts = list(executor.map(lambda cp: transcribe_chunk(whisper_model, cp, device), chunk_paths))
-            transcript = " ".join(transcripts)
-        st.write(transcript)
-        os.remove(file_path)
-        st.subheader("Meeting Summary")
-        with st.spinner("Generating summary..."):
-            truncated_text = transcript[:1024]
-            summary = summarizer(truncated_text, max_length=150, min_length=50)[0]['summary_text']
-        st.write(summary)
-        st.subheader("🚀 Action Items")
-        actions = extract_action_items(transcript, nlp)
-        if not actions:
-            st.write("No action items detected")
-        else:
-            for i, action in enumerate(actions, 1):
-                responsible = ", ".join(action["responsible"]) or "Unassigned"
-                deadline = ", ".join(action["deadline"]) or "No deadline"
-                st.markdown(f"""
-                **Action {i}**
-                - Task: {action["text"]}
-                - Responsible: {responsible}
-                - Deadline: {deadline}
-                """)
-        st.subheader("🔑 Key Terms")
-        key_phrases_result = keywords.keywords(transcript) or ""
-        key_phrases = [kp.strip() for kp in key_phrases_result.split("\n") if kp.strip()]
-        st.write(", ".join(key_phrases) if key_phrases else "No key terms extracted")
-if __name__ == "__main__":
-    main()

 import streamlit as st
 from transformers import pipeline
+import whisper
+import tempfile
 import os
+def transcribe_audio(audio_path):
+    model = whisper.load_model("base")  # Open-source Whisper model
+    result = model.transcribe(audio_path)
+    return result["text"]
+def summarize_text(text):
     summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
+    summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
+    return summary[0]['summary_text']
+def extract_action_items(text):
+    task_generator = pipeline("text-generation", model="databricks/dolly-v2-3b")
+    prompt = f"Extract action items from the following meeting notes:\n{text}\nAction Items:"
+    tasks = task_generator(prompt, max_length=100, do_sample=True)
+    return tasks[0]['generated_text']
+st.title("Smart AI Meeting Assistant")
+uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
+if uploaded_file is not None:
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
+        temp_audio.write(uploaded_file.read())
+        temp_audio_path = temp_audio.name
+    st.text("Transcribing...")
+    transcript = transcribe_audio(temp_audio_path)
+    st.text_area("Meeting Transcript:", transcript, height=200)
+    os.remove(temp_audio_path)
+    if st.button("Summarize Meeting"):
+        summary = summarize_text(transcript)
+        st.text_area("Meeting Summary:", summary, height=150)
+    if st.button("Generate Action Items"):
+        action_items = extract_action_items(transcript)
+        st.text_area("Action Items:", action_items, height=150)