Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,118 +1,43 @@
|
|
1 |
import streamlit as st
|
2 |
-
import whisper
|
3 |
-
import torch
|
4 |
from transformers import pipeline
|
5 |
-
import
|
6 |
-
import
|
7 |
-
from summa import keywords
|
8 |
-
import datetime
|
9 |
import os
|
10 |
-
from pydub import AudioSegment
|
11 |
-
import concurrent.futures
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
|
|
|
|
17 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
18 |
-
|
19 |
-
|
20 |
-
nlp = spacy.load("en_core_web_sm")
|
21 |
-
except OSError:
|
22 |
-
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"])
|
23 |
-
nlp = spacy.load("en_core_web_sm")
|
24 |
-
|
25 |
-
return whisper_model, summarizer, nlp, device
|
26 |
|
27 |
-
def
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
31 |
|
32 |
-
|
33 |
-
options = {"fp16": False} if device == "cpu" else {"fp16": True}
|
34 |
-
return whisper_model.transcribe(chunk_path, **options)["text"]
|
35 |
|
36 |
-
|
37 |
-
doc = nlp(text)
|
38 |
-
actions = []
|
39 |
-
|
40 |
-
for sent in doc.sents:
|
41 |
-
for token in sent:
|
42 |
-
if token.dep_ == "ROOT" and token.pos_ == "VERB":
|
43 |
-
action = {
|
44 |
-
"text": sent.text,
|
45 |
-
"responsible": [],
|
46 |
-
"deadline": []
|
47 |
-
}
|
48 |
-
|
49 |
-
for ent in sent.ents:
|
50 |
-
if ent.label_ == "PERSON":
|
51 |
-
action["responsible"].append(ent.text)
|
52 |
-
elif ent.label_ == "DATE":
|
53 |
-
action["deadline"].append(ent.text)
|
54 |
-
|
55 |
-
actions.append(action)
|
56 |
-
break
|
57 |
-
return actions
|
58 |
|
59 |
-
|
60 |
-
|
|
|
|
|
61 |
|
62 |
-
|
|
|
|
|
|
|
63 |
|
64 |
-
|
|
|
|
|
65 |
|
66 |
-
if
|
67 |
-
|
68 |
-
|
69 |
-
with open(file_path, "wb") as f:
|
70 |
-
f.write(audio_file.getbuffer())
|
71 |
-
|
72 |
-
st.subheader("Meeting Transcription")
|
73 |
-
with st.spinner("Transcribing audio..."):
|
74 |
-
chunks = split_audio(file_path)
|
75 |
-
chunk_paths = []
|
76 |
-
|
77 |
-
for i, chunk in enumerate(chunks):
|
78 |
-
chunk_path = f"chunk_{i}.wav"
|
79 |
-
chunk.export(chunk_path, format="wav")
|
80 |
-
chunk_paths.append(chunk_path)
|
81 |
-
|
82 |
-
with concurrent.futures.ThreadPoolExecutor() as executor:
|
83 |
-
transcripts = list(executor.map(lambda cp: transcribe_chunk(whisper_model, cp, device), chunk_paths))
|
84 |
-
|
85 |
-
transcript = " ".join(transcripts)
|
86 |
-
|
87 |
-
st.write(transcript)
|
88 |
-
os.remove(file_path)
|
89 |
-
|
90 |
-
st.subheader("Meeting Summary")
|
91 |
-
with st.spinner("Generating summary..."):
|
92 |
-
truncated_text = transcript[:1024]
|
93 |
-
summary = summarizer(truncated_text, max_length=150, min_length=50)[0]['summary_text']
|
94 |
-
st.write(summary)
|
95 |
-
|
96 |
-
st.subheader("π Action Items")
|
97 |
-
actions = extract_action_items(transcript, nlp)
|
98 |
-
|
99 |
-
if not actions:
|
100 |
-
st.write("No action items detected")
|
101 |
-
else:
|
102 |
-
for i, action in enumerate(actions, 1):
|
103 |
-
responsible = ", ".join(action["responsible"]) or "Unassigned"
|
104 |
-
deadline = ", ".join(action["deadline"]) or "No deadline"
|
105 |
-
st.markdown(f"""
|
106 |
-
**Action {i}**
|
107 |
-
- Task: {action["text"]}
|
108 |
-
- Responsible: {responsible}
|
109 |
-
- Deadline: {deadline}
|
110 |
-
""")
|
111 |
-
|
112 |
-
st.subheader("π Key Terms")
|
113 |
-
key_phrases_result = keywords.keywords(transcript) or ""
|
114 |
-
key_phrases = [kp.strip() for kp in key_phrases_result.split("\n") if kp.strip()]
|
115 |
-
st.write(", ".join(key_phrases) if key_phrases else "No key terms extracted")
|
116 |
-
|
117 |
-
if __name__ == "__main__":
|
118 |
-
main()
|
|
|
1 |
import streamlit as st
|
|
|
|
|
2 |
from transformers import pipeline
|
3 |
+
import whisper
|
4 |
+
import tempfile
|
|
|
|
|
5 |
import os
|
|
|
|
|
6 |
|
7 |
+
def transcribe_audio(audio_path):
|
8 |
+
model = whisper.load_model("base") # Open-source Whisper model
|
9 |
+
result = model.transcribe(audio_path)
|
10 |
+
return result["text"]
|
11 |
+
|
12 |
+
def summarize_text(text):
|
13 |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
14 |
+
summary = summarizer(text, max_length=150, min_length=50, do_sample=False)
|
15 |
+
return summary[0]['summary_text']
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
|
17 |
+
def extract_action_items(text):
|
18 |
+
task_generator = pipeline("text-generation", model="databricks/dolly-v2-3b")
|
19 |
+
prompt = f"Extract action items from the following meeting notes:\n{text}\nAction Items:"
|
20 |
+
tasks = task_generator(prompt, max_length=100, do_sample=True)
|
21 |
+
return tasks[0]['generated_text']
|
22 |
|
23 |
+
st.title("Smart AI Meeting Assistant")
|
|
|
|
|
24 |
|
25 |
+
uploaded_file = st.file_uploader("Upload an audio file", type=["mp3", "wav", "m4a"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
if uploaded_file is not None:
|
28 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.mp3') as temp_audio:
|
29 |
+
temp_audio.write(uploaded_file.read())
|
30 |
+
temp_audio_path = temp_audio.name
|
31 |
|
32 |
+
st.text("Transcribing...")
|
33 |
+
transcript = transcribe_audio(temp_audio_path)
|
34 |
+
st.text_area("Meeting Transcript:", transcript, height=200)
|
35 |
+
os.remove(temp_audio_path)
|
36 |
|
37 |
+
if st.button("Summarize Meeting"):
|
38 |
+
summary = summarize_text(transcript)
|
39 |
+
st.text_area("Meeting Summary:", summary, height=150)
|
40 |
|
41 |
+
if st.button("Generate Action Items"):
|
42 |
+
action_items = extract_action_items(transcript)
|
43 |
+
st.text_area("Action Items:", action_items, height=150)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|